Introduction

This is a brief workflow highlighting the exploratory analysis of survey data mined to assist in the writing of the manuscript, “Gender Disparities Persist in Endoscopy Suite” (Rabinowitz, et al.). Where appropriate, samples of the exact R syntax used will be displayed, along with the corresponding output (tabular data, graphical plots, maps, etc.).




DATA CLEAN-UP


require(broom)
require(dplyr)

SURVEY  <-
    GENDER_DIFF_DATA_LABELS %>% 
    filter( COMPLETE != "Incomplete" &
            BIRTHSEX != "OTHER" &
            !is.na(BIRTHSEX) ) %>% 
   select( RECORD_ID, BIRTHSEX, GENDER, RACE_SOUTHASIAN:RACE_OTHER, AGE, TRAINING_LEVEL, HEIGHT, GLOVE, GLOVE_SIZE_AVAILABLE, PERFORMANCE_HOURS, TEACHER_GENDER_PREFERENCE,
           FEMALE_TRAINERS, MALE_TRAINERS, EVER_INJURED, EXPERIENCED_TRANSIENT_PAIN_NO, EXPERIENCED_TRANSIENT_PAIN_HAND, EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER,
           EXPERIENCED_TRANSIENT_PAIN_BACK, EXPERIENCED_TRANSIENT_PAIN_LEG, EXPERIENCED_TRANSIENT_PAIN_FOOT, GROWING_PAINS,
           FELLOWSHIP_PREGNANCY, starts_with( c("PREGNANCY", "POSTPARTUM")),
           FELLOWSHIP_FORMAL_ERGO_TRAINING, INFORMAL_TRAINING, TRAINING_TECHNIQUES_POSTURAL, TRAINING_TECHNIQUES_BEDHEIGHT, TRAINING_TECHNIQUES_BEDANGLE,
           TRAINING_TECHNIQUES_MONITORHEIGHT, TRAINING_TECHNIQUES_MUSCULOSKELETAL, TRAINING_TECHNIQUES_EXERCISE_STRETCHING, TRAINING_TECHNIQUES_DIAL_EXTENDERS,
           TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE, ERGO_TRAINING_BUDGET, ERGO_FEEDBACK, ERGO_FEEDBACK_BY_WHOM, ERGO_OPTIMIZATION, GLOVE_SIZE_AVAILABLE,
           DIAL_EXTENDERS_AVAILABLE, DIAL_EXTENDERS_ENCOURAGED, DIAL_EXTENDERS_FEMALEATT, DIAL_EXTENDERS_MALEATT, PEDI_COLONOSCOPES_AVAILABLE,
           LEAD_APRONS_AVAILABLE,  LEAD_APRONS_LW_ONEPIECE, LEAD_APRONS_LW_TWOPIECE, LEAD_APRONS_STANDARD_ONEPIECE, LEAD_APRONS_STANDARD_TWOPIECE,
           LEAD_APRONS_DOUBLE, LEAD_APRONS_THYROID, LEAD_APRONS_MATERNALDOS, LEAD_APRONS_FETALDOS,
           ERGO_FORMAL_TIMEOUT_PRIOR, ERGO_INFORMAL_TIMEOUT_PRIOR, MONITORS_ADJUSTABLE, TEACHER_SENSITIVITY_STATURE_HANDSIZE,
           TEACHER_SENSITIVITY_BY_GENDER, TACTILE_INSTRUCTION_FROM_MALES, TACTILE_INSTRUCTION_FROM_FEMALES,
           COMFORTABLE_ASKING_NURSES, ASK_NURSES_ONCE, ASK_NURSES_TWICE, ASK_NURSES_MORE,
           COMFORTABLE_ASKING_TECHS, MALE_ATTENDINGS_ASKING, FEMALE_ATTENDINGS_ASKING,
           RECOGNIZED_RESPECTED_ES_STAFF, RECOGNIZED_RESPECTED_ANESTHETISTS, RECOGNIZED_RESPECTED_GASTRO_ATTENDING, FIRST_NAME_NO_PERMISSION, 
           ERGO_TRAINING_MANDATORY, ERGO_OPTIMIZAITON_BUDGET_REQUIRED, EXPERIENCE_IMPROVED_DIAL_EXTENDERS, EXPERIENCE_IMPROVED_PEDI_COLONOSCOPES, EXPERIENCE_IMPROVED_APRONS,
           ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED,
           ERGONOMIC_IMPORTANCE, MITIGATING_MUSCLE_STRAIN, BED_POSITION, ENDO_TRAINER_POSITION, WHEN_DISABILITY_INSURANCE) %>% 
  
    mutate(AGE2 = ifelse( AGE %in% c('< 30', '30-34', '35-40'), AGE, '> 40' )) %>% 
  
  
    mutate( RACE = ifelse( RACE_HISPANIC == "Y", "HISPANIC",
                   ifelse( RACE_WHITE == "Y", "WHITE",
                   ifelse( RACE_BLACK == "Y", "BLACK",
                   ifelse (RACE_SOUTHASIAN == "Y", "ASIAN SOUTH",
                   ifelse (RACE_EASTASIAN == "Y", "ASIAN EAST",
                   ifelse (RACE_NATIVEAMER == "Y", "OTHER",
                   ifelse (RACE_PACIFICISLAND == "Y", "OTHER",
                   ifelse (RACE_OTHER == "Y", "OTHER", "OTHER" )))))))),
            RACE = factor(RACE, levels= c('ASIAN EAST', 'ASIAN SOUTH', 'BLACK', 'HISPANIC', 'WHITE', 'OTHER'))) %>%
  
    mutate( BIRTHSEX = factor( BIRTHSEX,  levels= c("F","M") ),
            GENDER =   factor( toupper(GENDER), levels= c('WOMAN','MAN','OTHER' )))  %>% 
    mutate (AGE2 = factor(AGE2, levels = c('< 30', '30-34', '35-40', '> 40'))) %>%  

    mutate (RACE2 = case_when( RACE != "WHITE" ~ 'NON-WHITE',
                               TRUE ~ 'WHITE'),
            RACE2 = factor(RACE2, levels = c("WHITE", "NON-WHITE"))) %>% 
            

                           
                           
    mutate( TRAINING_LEVEL = factor (TRAINING_LEVEL, levels= c('First year fellow','Second year fellow', 'Third year fellow', 'Advanced fellow'))) %>% 
    mutate( TRAINING_LEVEL = recode_factor( TRAINING_LEVEL, 'First year fellow'= 'First Year', 
                                                            'Second year fellow'= 'Second Year', 
                                                            'Third year fellow' = 'Third Year', 
                                                            'Advanced fellow' = "Advanced", .ordered = T) ) %>% 
   mutate( HEIGHT2 = factor(HEIGHT, levels= c("< 5'", "5-5'3", "5'4-5'6", "5'7-5'9", "5'10-6'", "6'1-6'4", "> 6'4"))) %>% 
   mutate( PERFORMANCE_HOURS = factor(PERFORMANCE_HOURS),
           PERFORMANCE_HOURS = recode_factor(PERFORMANCE_HOURS, "< 10" = "< 10",
                                                               "10-20" = "10-20",
                                                               "21-30" = "21-30",
                                                               "31-40" = "31-40",
                                                               .default = "> 40")) %>% 
  
  mutate(TEACHER_GENDER_PREFERENCE = factor(TEACHER_GENDER_PREFERENCE),
         TEACHER_GENDER_PREFERENCE = recode_factor(TEACHER_GENDER_PREFERENCE, "Yes" = "Yes",
                                                                      .default = "No")) %>% 
  
  mutate( FEMALE_TRAINERS = factor(FEMALE_TRAINERS),
          FEMALE_TRAINERS = recode_factor(FEMALE_TRAINERS, 'None' = 'None',
                                                           '1-2' = '1-2',
                                                           '3-5' = '3-5',
                                                           '6-10' = '6-10',
                                                           .default = '> 10' )) %>% 
  mutate( MALE_TRAINERS = factor(MALE_TRAINERS),
          MALE_TRAINERS = recode_factor(MALE_TRAINERS, 'None' = 'None',
                                                           '1-2' = '1-2',
                                                           '3-5' = '3-5',
                                                           '6-10' = '6-10',
                                                          .default = "> 10") ) %>% 
  
  mutate( EVER_INJURED = factor(EVER_INJURED)) %>% 
  mutate( EXPERIENCED_TRANSIENT_PAIN_NO = factor(EXPERIENCED_TRANSIENT_PAIN_NO)) %>% 
  mutate( EXPERIENCED_TRANSIENT_PAIN_HAND = factor(EXPERIENCED_TRANSIENT_PAIN_HAND)) %>% 
  mutate( EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER = factor(EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER)) %>% 
  mutate( EXPERIENCED_TRANSIENT_PAIN_BACK = factor(EXPERIENCED_TRANSIENT_PAIN_BACK)) %>% 
  mutate( EXPERIENCED_TRANSIENT_PAIN_LEG = factor(EXPERIENCED_TRANSIENT_PAIN_LEG)) %>% 
  mutate( EXPERIENCED_TRANSIENT_PAIN_FOOT = factor(EXPERIENCED_TRANSIENT_PAIN_FOOT))  %>% 
  
  mutate( FELLOWSHIP_PREGNANCY = factor(FELLOWSHIP_PREGNANCY),
          PREGNANCY_ERGO_DIFFICULTY = factor(PREGNANCY_ERGO_DIFFICULTY),
          PREGNANCY_ERGO_INJURY = factor( PREGNANCY_ERGO_INJURY),
          POSTPARTUM_ERGO_INJURY = factor( POSTPARTUM_ERGO_INJURY)) %>% 
  
  mutate( GROWING_PAINS = factor(GROWING_PAINS)) %>% 
  mutate( FELLOWSHIP_FORMAL_ERGO_TRAINING = factor(FELLOWSHIP_FORMAL_ERGO_TRAINING)) %>% 
  mutate( INFORMAL_TRAINING = factor(INFORMAL_TRAINING)) %>% 
  mutate( TRAINING_TECHNIQUES_POSTURAL = factor(TRAINING_TECHNIQUES_POSTURAL)) %>% 
  mutate( TRAINING_TECHNIQUES_BEDHEIGHT = factor(TRAINING_TECHNIQUES_BEDHEIGHT)) %>% 
  mutate( TRAINING_TECHNIQUES_BEDANGLE = factor(TRAINING_TECHNIQUES_BEDANGLE)) %>% 
  mutate( TRAINING_TECHNIQUES_MONITORHEIGHT = factor(TRAINING_TECHNIQUES_MONITORHEIGHT)) %>% 
  mutate( TRAINING_TECHNIQUES_MUSCULOSKELETAL = factor(TRAINING_TECHNIQUES_MUSCULOSKELETAL)) %>% 
  mutate( TRAINING_TECHNIQUES_EXERCISE_STRETCHING = factor(TRAINING_TECHNIQUES_EXERCISE_STRETCHING)) %>% 
  mutate( TRAINING_TECHNIQUES_DIAL_EXTENDERS = factor(TRAINING_TECHNIQUES_DIAL_EXTENDERS)) %>% 
  mutate( TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE = factor(TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE)) %>% 
  
  mutate( ERGO_TRAINING_BUDGET = factor(ERGO_TRAINING_BUDGET),
          ERGO_TRAINING_BUDGET = recode_factor(ERGO_TRAINING_BUDGET, 'Y' = 'Y',
                                                                     'N' = 'N',
                                                                     "Don't know" = 'DK', .ordered= T)) %>% 
  

  mutate( ERGO_FEEDBACK = factor(ERGO_FEEDBACK),
          ERGO_FEEDBACK = recode_factor(ERGO_FEEDBACK, 'Never' = 'Never',
                                                       'Rarely' = 'Rarely',
                                                       'Sometimes' = 'Sometimes',
                                                        'Often' = 'Often', .ordered = T )) %>% 
            
  mutate( ERGO_FEEDBACK_BY_WHOM = factor(ERGO_FEEDBACK_BY_WHOM),
          ERGO_FEEDBACK_BY_WHOM = recode_factor(ERGO_FEEDBACK_BY_WHOM, 'I do not or rarely receive ergonomic feedback' = "Do not/rarely received feedback",
                                                                     'Mostly male endoscopy teachers' = 'Mostly male teachers',
                                                                     'Mostly female endoscopy teachers' = 'Mostly female teachers',
                                                                     'Both male and female endoscopy teachers equally' = 'Both equally' , .ordered = T)) %>% 
  
  
  mutate( ERGO_OPTIMIZATION = factor(ERGO_OPTIMIZATION),
          ERGO_OPTIMIZATION = recode_factor(ERGO_OPTIMIZATION, 'Y' = 'Y',
                                                                     'N' = 'N',
                                                                     "Don't know" = 'DK', .ordered= T)) %>% 
  
  mutate( GLOVE_SIZE_AVAILABLE = factor(GLOVE_SIZE_AVAILABLE)) %>% 
  
  mutate( DIAL_EXTENDERS_AVAILABLE = factor(DIAL_EXTENDERS_AVAILABLE),
          DIAL_EXTENDERS_AVAILABLE = recode_factor(DIAL_EXTENDERS_AVAILABLE, 'Y' = 'Y',
                                                                             'N' = 'N',
                                                                             "Don't know" = 'DK', .ordered= T)) %>% 
  
  mutate( DIAL_EXTENDERS_ENCOURAGED = factor(DIAL_EXTENDERS_ENCOURAGED),
          DIAL_EXTENDERS_ENCOURAGED = recode_factor(DIAL_EXTENDERS_ENCOURAGED, 'Y' = 'Y',
                                                                                 'N' = 'N',
                                                                                 "Don't use" = 'DU', .ordered= T)) %>% 
  
  

  mutate( DIAL_EXTENDERS_FEMALEATT = factor(DIAL_EXTENDERS_FEMALEATT),
          DIAL_EXTENDERS_FEMALEATT = recode_factor(DIAL_EXTENDERS_FEMALEATT, 'Not Likely' = 'Not Likely',
                                                                             'Somewhat Likely' = 'Somewhat Likely',
                                                                             'Sometimes' = 'Sometimes',
                                                                             'Verly Likely' = 'Very Likely',
                                                                             'NA' = 'NA', .ordered = T )) %>% 

  mutate( DIAL_EXTENDERS_MALEATT = factor(DIAL_EXTENDERS_MALEATT),
          DIAL_EXTENDERS_MALEATT = recode_factor(DIAL_EXTENDERS_MALEATT, 'Not Likely' = 'Not Likely',
                                                                             'Somewhat Likely' = 'Somewhat Likely',
                                                                             'Sometimes' = 'Sometimes',
                                                                             'Verly Likely' = 'Very Likely',
                                                                             'NA' = 'NA', .ordered = T ))  %>% 
    mutate( PEDI_COLONOSCOPES_AVAILABLE = factor(PEDI_COLONOSCOPES_AVAILABLE),
          PEDI_COLONOSCOPES_AVAILABLE = recode_factor(PEDI_COLONOSCOPES_AVAILABLE, 'Y' = 'Y',
                                                                                   'N' = 'N',
                                                                                   "Don't know" = 'DK', .ordered= T)) %>% 
  
   mutate( LEAD_APRONS_AVAILABLE = factor(LEAD_APRONS_AVAILABLE),
          LEAD_APRONS_AVAILABLE = recode_factor(LEAD_APRONS_AVAILABLE, 'N' = 'N',
                                                                       'Y' = 'Y',
                                                                       "Don't know" = "DK",.ordered= T)) %>% 
  
   mutate( TEACHER_SENSITIVITY_BY_GENDER = factor(TEACHER_SENSITIVITY_BY_GENDER),
          TEACHER_SENSITIVITY_BY_GENDER = recode_factor(TEACHER_SENSITIVITY_BY_GENDER, 'Male' = 'Male',
                                                           'Female' = 'Female',
                                                           'Both equally' = 'Both Equally',
                                                           'I have not had female endoscopy teachers' = 'Never had female teacher',
                                                           'Not sure' = 'Not Sure', .ordered= T ))  %>% 
  
   mutate( TACTILE_INSTRUCTION_FROM_MALES = factor(TACTILE_INSTRUCTION_FROM_MALES),
          TACTILE_INSTRUCTION_FROM_MALES = recode_factor(TACTILE_INSTRUCTION_FROM_MALES, 'No' = 'No',
                                                                                         'Yes, rarely' = 'Rarely',
                                                                                         'Yes, often' = 'Often', .ordered= T)) %>% 
  
   mutate( TACTILE_INSTRUCTION_FROM_FEMALES = factor(TACTILE_INSTRUCTION_FROM_FEMALES),
          TACTILE_INSTRUCTION_FROM_FEMALES = recode_factor(TACTILE_INSTRUCTION_FROM_FEMALES, 'No' = 'No',
                                                                                         'Yes, rarely' = 'Rarely',
                                                                                         'Yes, often' = 'Often', .ordered= T)) %>% 
  
   mutate( NURSES_ASKING = ifelse( ASK_NURSES_MORE == "Y", "More than Twice",
                           ifelse( ASK_NURSES_TWICE == "Y", "Twice",
                           ifelse( ASK_NURSES_ONCE == "Y", "Once", NA))),
           
           NURSES_ASKING = factor(NURSES_ASKING),
           NURSES_ASKING = recode_factor(NURSES_ASKING, "Once" = "Once",
                                                        "Twice" = "Twice",
                                                        "More than Twicce" = "More than Twice", .ordered=T),
  
           MALE_ATTENDINGS_ASKING = factor(MALE_ATTENDINGS_ASKING),
           MALE_ATTENDINGS_ASKING = recode_factor(MALE_ATTENDINGS_ASKING, "Once" = "Once",
                                                                          "Twice" = "Twice",
                                                                           "More than Twice" = "More than Twice", .ordered=T),

           FEMALE_ATTENDINGS_ASKING = factor(FEMALE_ATTENDINGS_ASKING),
           FEMALE_ATTENDINGS_ASKING = recode_factor(FEMALE_ATTENDINGS_ASKING, "Once" = "Once",
                                                                             "Twice" = "Twice",
                                                                             "More than twice" = "More than Twice",
                                                                             "Not applicable, I do not work with any female attendings" = "Don't work with FemAtt", .ordered=T)) %>% 
  
   mutate( ERGO_TRAINING_MANDATORY = factor(ERGO_TRAINING_MANDATORY),
           ERGO_TRAINING_MANDATORY = recode_factor(ERGO_TRAINING_MANDATORY, 'Y' = 'Y',
                                                                              'N' = 'N',
                                                                            "Don't know" = 'DK', .ordered= T) ,
    
           ERGO_OPTIMIZAITON_BUDGET_REQUIRED = factor(ERGO_OPTIMIZAITON_BUDGET_REQUIRED),
           ERGO_OPTIMIZAITON_BUDGET_REQUIRED = recode_factor(ERGO_OPTIMIZAITON_BUDGET_REQUIRED, 'Y' = 'Y',
                                                                              'N' = 'N',
                                                                            "Don't know" = 'DK', .ordered= T),
                 
                                                
          EXPERIENCE_IMPROVED_DIAL_EXTENDERS = factor(EXPERIENCE_IMPROVED_DIAL_EXTENDERS),
           EXPERIENCE_IMPROVED_DIAL_EXTENDERS = recode_factor(EXPERIENCE_IMPROVED_DIAL_EXTENDERS, 'Y' = 'Y',
                                                                              'N' = 'N',
                                                                            "Don't know" = 'DK', .ordered= T),
     
           EXPERIENCE_IMPROVED_PEDI_COLONOSCOPES = factor(EXPERIENCE_IMPROVED_PEDI_COLONOSCOPES),
           EXPERIENCE_IMPROVED_PEDI_COLONOSCOPES = recode_factor(EXPERIENCE_IMPROVED_PEDI_COLONOSCOPES, 'Y' = 'Y',
                                                                              'N' = 'N',
                                                                            "Don't know" = 'DK', .ordered= T) ,
     
           EXPERIENCE_IMPROVED_APRONS = factor(EXPERIENCE_IMPROVED_APRONS),
           EXPERIENCE_IMPROVED_APRONS = recode_factor(EXPERIENCE_IMPROVED_APRONS, 'Y' = 'Y',
                                                                              'N' = 'N',
                                                                            "Don't know" = 'DK', .ordered= T),
     
           ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED = factor(ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED),
           ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED = recode_factor(ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED, 'Y' = 'Y',
                                                                              'N' = 'N',
                                                                            "Don't know" = 'DK', .ordered= T))  %>% 
  
  
   mutate( ERGONOMIC_IMPORTANCE = factor(ERGONOMIC_IMPORTANCE),
           ERGONOMIC_IMPORTANCE = recode_factor(ERGONOMIC_IMPORTANCE, 'Both A and C' = 'Correct',
                                                                       .default = 'Incorrect', .ordered= T) ,
    
           MITIGATING_MUSCLE_STRAIN = factor(MITIGATING_MUSCLE_STRAIN),
           MITIGATING_MUSCLE_STRAIN = recode_factor(MITIGATING_MUSCLE_STRAIN, 'All of the above' = 'Correct',
                                                                       .default = 'Incorrect', .ordered= T) ,
    
           BED_POSITION = factor(BED_POSITION),
           BED_POSITION = recode_factor(BED_POSITION, '10 cm below elbow height' = 'Correct',
                                                                       .default = 'Incorrect', .ordered= T) ,
    
           ENDO_TRAINER_POSITION = factor(ENDO_TRAINER_POSITION),
           ENDO_TRAINER_POSITION = recode_factor(ENDO_TRAINER_POSITION, 'At the foot of the bed, on the same side of the trainee.' = 'Correct',
                                                                       .default = 'Incorrect', .ordered= T) ,
    
           WHEN_DISABILITY_INSURANCE = factor(WHEN_DISABILITY_INSURANCE),
           WHEN_DISABILITY_INSURANCE = recode_factor(WHEN_DISABILITY_INSURANCE, 'During training' = 'Correct',
                                                                       .default = 'Incorrect', .ordered= T) ) %>% 


# Add a Sex-Adjusted Height Cat based on Median Height bands for each Sex
   
     group_by( BIRTHSEX ) %>% 
  
  mutate(HEIGHT3 = case_when( is.na(HEIGHT2) ~ NA_real_,
                                  HEIGHT2 == "< 5'"    ~ 1,
                                  HEIGHT2 == "5-5'3"   ~ 2,
                                  HEIGHT2 == "5'4-5'6" ~ 3,
                                  HEIGHT2 == "5'7-5'9" ~ 4,
                                  HEIGHT2 == "5'10-6'" ~ 5,
                                  HEIGHT2 == "6'1-6'4" ~ 6,
                                  TRUE ~ 7),

          MEDIAN_HT = median(HEIGHT3, na.rm=T),
          HTCATSEX = case_when ( is.na(MEDIAN_HT) ~ NA_real_,
                                 HEIGHT3 < MEDIAN_HT ~ 1,
                                 HEIGHT3 == MEDIAN_HT ~ 2,
                                 TRUE ~ 3),
         
         HTCATSEX =   recode_factor(factor(HTCATSEX), '1'= 'SHORT', '2'= "AVG", '3'= "TALL", .ordered=F),
         HTCATSEX =   relevel(HTCATSEX, ref= "TALL")
         
         ) %>% ungroup() %>% select ( -HEIGHT3)


Here’s a glimpse of the structure of the resulting dataset SURVEY:

glimpse(SURVEY)
## Rows: 236
## Columns: 96
## $ RECORD_ID                                 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 1…
## $ BIRTHSEX                                  <fct> F, F, F, M, F, F, F, F, F, M…
## $ GENDER                                    <fct> WOMAN, WOMAN, WOMAN, MAN, WO…
## $ RACE_SOUTHASIAN                           <chr> "N", "N", "N", "Y", "N", "N"…
## $ RACE_EASTASIAN                            <chr> "N", "N", "N", "N", "Y", "Y"…
## $ RACE_WHITE                                <chr> "N", "Y", "Y", "N", "N", "N"…
## $ RACE_BLACK                                <chr> "N", "N", "N", "N", "N", "N"…
## $ RACE_HISPANIC                             <chr> "Y", "N", "N", "N", "N", "N"…
## $ RACE_NATIVEAMER                           <chr> "N", "N", "N", "N", "N", "N"…
## $ RACE_PACIFICISLAND                        <chr> "N", "N", "N", "N", "N", "N"…
## $ RACE_OTHER                                <chr> "N", "N", "N", "N", "N", "N"…
## $ AGE                                       <chr> "30-34", "30-34", "30-34", "…
## $ TRAINING_LEVEL                            <ord> Third Year, Third Year, Firs…
## $ HEIGHT                                    <chr> "5'4-5'6", "5'4-5'6", "5'4-5…
## $ GLOVE                                     <dbl> 6.5, 6.5, 6.0, 7.0, 6.5, 5.5…
## $ GLOVE_SIZE_AVAILABLE                      <fct> Y, Y, Y, Y, N, N, Y, Y, N, Y…
## $ PERFORMANCE_HOURS                         <fct> 10-20, < 10, 10-20, 31-40, 1…
## $ TEACHER_GENDER_PREFERENCE                 <fct> No, No, Yes, No, No, No, No,…
## $ FEMALE_TRAINERS                           <fct> None, 6-10, 6-10, 6-10, 6-10…
## $ MALE_TRAINERS                             <fct> 6-10, > 10, > 10, > 10, > 10…
## $ EVER_INJURED                              <fct> N, N, N, N, Y, N, N, N, N, N…
## $ EXPERIENCED_TRANSIENT_PAIN_NO             <fct> Y, N, N, N, N, N, N, N, N, N…
## $ EXPERIENCED_TRANSIENT_PAIN_HAND           <fct> N, Y, Y, Y, Y, Y, Y, N, N, Y…
## $ EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER  <fct> N, Y, Y, Y, Y, N, Y, Y, Y, Y…
## $ EXPERIENCED_TRANSIENT_PAIN_BACK           <fct> N, Y, Y, Y, Y, N, Y, N, N, Y…
## $ EXPERIENCED_TRANSIENT_PAIN_LEG            <fct> N, N, Y, Y, N, N, N, N, N, N…
## $ EXPERIENCED_TRANSIENT_PAIN_FOOT           <fct> N, N, Y, Y, N, Y, N, N, N, N…
## $ GROWING_PAINS                             <fct> NA, Y, Y, Y, Y, N, N, Y, N, …
## $ FELLOWSHIP_PREGNANCY                      <fct> N, Y, N, NA, N, N, N, N, N, …
## $ PREGNANCY_ERGO_DIFFICULTY                 <fct> NA, Y, NA, NA, NA, NA, NA, N…
## $ PREGNANCY_ERGO_INJURY                     <fct> NA, N, NA, NA, NA, NA, NA, N…
## $ POSTPARTUM_ERGO_INJURY                    <fct> NA, N, NA, NA, NA, NA, NA, N…
## $ FELLOWSHIP_FORMAL_ERGO_TRAINING           <fct> N, N, N, N, N, N, N, Y, N, Y…
## $ INFORMAL_TRAINING                         <fct> Y, Y, Y, Y, Y, Y, N, Y, Y, Y…
## $ TRAINING_TECHNIQUES_POSTURAL              <fct> Y, N, Y, Y, N, Y, Y, Y, N, Y…
## $ TRAINING_TECHNIQUES_BEDHEIGHT             <fct> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y…
## $ TRAINING_TECHNIQUES_BEDANGLE              <fct> Y, N, Y, Y, N, Y, Y, N, Y, Y…
## $ TRAINING_TECHNIQUES_MONITORHEIGHT         <fct> Y, N, N, Y, N, Y, Y, N, Y, Y…
## $ TRAINING_TECHNIQUES_MUSCULOSKELETAL       <fct> Y, N, N, Y, N, N, N, Y, Y, N…
## $ TRAINING_TECHNIQUES_EXERCISE_STRETCHING   <fct> N, N, N, N, N, N, N, N, N, N…
## $ TRAINING_TECHNIQUES_DIAL_EXTENDERS        <fct> N, N, Y, N, Y, N, Y, N, N, N…
## $ TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE <fct> Y, N, Y, Y, Y, N, Y, Y, Y, N…
## $ ERGO_TRAINING_BUDGET                      <ord> DK, N, DK, DK, N, DK, DK, N,…
## $ ERGO_FEEDBACK                             <ord> Sometimes, Rarely, Sometimes…
## $ ERGO_FEEDBACK_BY_WHOM                     <ord> Mostly male teachers, Mostly…
## $ ERGO_OPTIMIZATION                         <ord> DK, N, N, Y, N, N, Y, DK, N,…
## $ DIAL_EXTENDERS_AVAILABLE                  <ord> DK, N, Y, Y, Y, N, Y, DK, N,…
## $ DIAL_EXTENDERS_ENCOURAGED                 <ord> DU, N, Y, Y, Y, DU, Y, NA, N…
## $ DIAL_EXTENDERS_FEMALEATT                  <ord> NA, NA, Not likely, NA, Very…
## $ DIAL_EXTENDERS_MALEATT                    <ord> NA, NA, Very likely, NA, Ver…
## $ PEDI_COLONOSCOPES_AVAILABLE               <ord> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y…
## $ LEAD_APRONS_AVAILABLE                     <ord> Y, Y, DK, Y, DK, N, DK, Y, N…
## $ LEAD_APRONS_LW_ONEPIECE                   <chr> "N", "N", "N", "Y", "N", "Y"…
## $ LEAD_APRONS_LW_TWOPIECE                   <chr> "Y", "N", "N", "Y", "N", "Y"…
## $ LEAD_APRONS_STANDARD_ONEPIECE             <chr> "N", "Y", "N", "Y", "N", "Y"…
## $ LEAD_APRONS_STANDARD_TWOPIECE             <chr> "N", "Y", "N", "Y", "N", "Y"…
## $ LEAD_APRONS_DOUBLE                        <chr> "N", "N", "N", "N", "N", "N"…
## $ LEAD_APRONS_THYROID                       <chr> "N", "N", "N", "Y", "N", "N"…
## $ LEAD_APRONS_MATERNALDOS                   <chr> "N", "N", "N", "N", "N", "N"…
## $ LEAD_APRONS_FETALDOS                      <chr> "N", "N", "N", "N", "N", "N"…
## $ ERGO_FORMAL_TIMEOUT_PRIOR                 <chr> "N", "N", "N", "N", "N", "N"…
## $ ERGO_INFORMAL_TIMEOUT_PRIOR               <chr> "Y", "N", "Y", "Y", "Y", "Y"…
## $ MONITORS_ADJUSTABLE                       <chr> "Y", "N", "N", "Y", "N", "Y"…
## $ TEACHER_SENSITIVITY_STATURE_HANDSIZE      <chr> "Y", "N", "N", "Y", "N", "N"…
## $ TEACHER_SENSITIVITY_BY_GENDER             <ord> Never had female teacher, No…
## $ TACTILE_INSTRUCTION_FROM_MALES            <ord> Often, No, No, No, No, No, O…
## $ TACTILE_INSTRUCTION_FROM_FEMALES          <ord> No, No, No, Rarely, Rarely, …
## $ COMFORTABLE_ASKING_NURSES                 <chr> "Y", "Y", "Y", "Y", "Y", "Y"…
## $ ASK_NURSES_ONCE                           <chr> "Y", "Y", "N", "N", "N", "Y"…
## $ ASK_NURSES_TWICE                          <chr> "N", "N", "Y", "N", "N", "N"…
## $ ASK_NURSES_MORE                           <chr> "N", "N", "N", "Y", "Y", "N"…
## $ COMFORTABLE_ASKING_TECHS                  <chr> "Y", "Y", "Y", "Y", "Y", "Y"…
## $ MALE_ATTENDINGS_ASKING                    <ord> Once, NA, More than twice, M…
## $ FEMALE_ATTENDINGS_ASKING                  <ord> Don't work with FemAtt, NA, …
## $ RECOGNIZED_RESPECTED_ES_STAFF             <chr> "Y", "Y", "Y", "Y", "Y", "Y"…
## $ RECOGNIZED_RESPECTED_ANESTHETISTS         <chr> "Y", "Y", "Y", "Y", "Y", "N"…
## $ RECOGNIZED_RESPECTED_GASTRO_ATTENDING     <chr> "Y", "Y", "Y", "Y", "Y", "Y"…
## $ FIRST_NAME_NO_PERMISSION                  <chr> "N", "Y", "Y", "N", "N", "Y"…
## $ ERGO_TRAINING_MANDATORY                   <ord> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y…
## $ ERGO_OPTIMIZAITON_BUDGET_REQUIRED         <ord> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y…
## $ EXPERIENCE_IMPROVED_DIAL_EXTENDERS        <ord> DK, DK, Y, N, Y, Y, Y, DK, Y…
## $ EXPERIENCE_IMPROVED_PEDI_COLONOSCOPES     <ord> DK, N, Y, N, Y, Y, Y, N, Y, …
## $ EXPERIENCE_IMPROVED_APRONS                <ord> N, Y, Y, N, Y, Y, Y, DK, Y, …
## $ ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED    <ord> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y…
## $ ERGONOMIC_IMPORTANCE                      <ord> Incorrect, Correct, Correct,…
## $ MITIGATING_MUSCLE_STRAIN                  <ord> Incorrect, Correct, Correct,…
## $ BED_POSITION                              <ord> Incorrect, Incorrect, Correc…
## $ ENDO_TRAINER_POSITION                     <ord> Incorrect, Correct, Incorrec…
## $ WHEN_DISABILITY_INSURANCE                 <ord> Incorrect, Correct, Correct,…
## $ AGE2                                      <fct> 30-34, 30-34, 30-34, 30-34, …
## $ RACE                                      <fct> HISPANIC, WHITE, WHITE, ASIA…
## $ RACE2                                     <fct> NON-WHITE, WHITE, WHITE, NON…
## $ HEIGHT2                                   <fct> 5'4-5'6, 5'4-5'6, 5'4-5'6, 6…
## $ NURSES_ASKING                             <ord> Once, Once, Twice, More than…
## $ MEDIAN_HT                                 <dbl> 3, 3, 3, 5, 3, 3, 3, 3, 3, 5…
## $ HTCATSEX                                  <fct> AVG, AVG, AVG, TALL, TALL, S…



Do we have a large enough sample to adequate “power” our analysis?


A glimpse of our scrubbed data shows that we have 236 observations, one for each respondent to our survey. Is this sample size large enough to power the analyses that we wish to conduct? It might be helpful to establish that now, especially considering that some observations may be dropped (due to missingness, skew, or the subject having provided an answer that slipped through any validation guardrails that were established beforehand).

Since most of our analyses will be done using the Chi-Square statistic, let’s start there. Below are the results of power calculations for a “medium effect size” (0.3), based off Cohen’s effect size calculation. We use this 0.3 threshold to test how many subjects would be required for 90% power (80% is the standard threshold, but we want to be a bit more aggressive here) based on four Chi-Square Scenarios:

  • A 2 x 2 Chi-Square (one degree of freedom): e.g., BIRTHSEX x a two-level categorical variable (Y/N)
  • A 2 x 3 Chi-Square (two degrees of freedom): e.g., BIRTHSEX x a three-level categorical variable (Y/N/Don’t Know)
  • A 2 x 4 Chi-Square (three degress of freedom): e.g., BIRTHSEX x a four-level categorical variable (Training Level)
  • A 2 x 6 Chi-Square (five degrees of freedom): e.g., BIRTHSEX x a six-level categorical variable (Race)



require(effsize)
## Loading required package: effsize
## 
## Attaching package: 'effsize'
## The following object is masked from 'package:psych':
## 
##     cohen.d
require(pwr)
## Loading required package: pwr
cohen.ES(test = "chisq", size = "medium")
## 
##      Conventional effect size from Cohen (1982) 
## 
##            test = chisq
##            size = medium
##     effect.size = 0.3
chi2<- plot(pwr.chisq.test(w=0.3, N=NULL, df=1, sig.level=0.05, power=0.9))+
  set_theme(base= theme_apa()) +
  geom_hline(yintercept= .80, linetype='solid', col = 'red', alpha=0.5)+
  geom_hline(yintercept= .90, linetype='dotted', col = 'darkblue', size=1)+
  ggtitle("Chi-Square Power Calculation - 2x2 Table")
  

chi3<- plot(pwr.chisq.test(w=0.3, N=NULL, df=2, sig.level=0.05, power=0.9))+
  set_theme(base= theme_apa()) +
  geom_hline(yintercept= .80, linetype='solid', col = 'red', alpha=0.5)+
  geom_hline(yintercept= .90, linetype='dotted', col = 'darkblue', size=1)+
    ggtitle("Chi-Square Power Calculation - 2x3 Table")
  
chi4<- plot(pwr.chisq.test(w=0.3, N=NULL, df=3, sig.level=0.05, power=0.9))+
          set_theme(base= theme_apa()) +
  geom_hline(yintercept= .80, linetype='solid', col = 'red', alpha=0.5)+
  geom_hline(yintercept= .90, linetype='dotted', col = 'darkblue', size=1)+
         ggtitle("Chi-Square Power Calculation - 2x4 Table")

chi6<- plot(pwr.chisq.test(w=0.3, N=NULL, df=5, sig.level=0.05, power=0.9))+
  set_theme(base= theme_apa()) +
  geom_hline(yintercept= .80, linetype='solid', col = 'red', alpha=0.5)+
  geom_hline(yintercept= .90, linetype='dotted', col = 'darkblue', size=1)+
  ggtitle("Chi-Square Power Calculation - 2x6 Table")

wrap_plots(chi2,chi3,chi4, chi6)


Based off these calculations, the maximum number of subjects required to achieve a “moderate effect size” with 90% power in the types of Chi-Square analyses we aim to perform is 183 – and we have approximately 23% more subjects than that. So we’re good to go as far as Chi-Squares are concerned.

Since what constitutes a “moderate effect size” can change depending on the type of statistical test being performed, below are the results of power calculatiosn for a two-tailed T-test, ANOVA (four-level categorical variable), Pearson correlations, and the F-test from multivariate regression analyses:


require(effsize)
require(pwr)


#T-Test
cohen.ES(test = "t", size="medium")
## 
##      Conventional effect size from Cohen (1982) 
## 
##            test = t
##            size = medium
##     effect.size = 0.5
tpower <- plot(pwr.t.test(n=NULL, d=0.5, sig.level=0.05, power=0.9, type="two.sample"))+
  set_theme(base= theme_apa()) +
  geom_hline(yintercept= .80, linetype='solid', col = 'red', alpha=0.5)+
  geom_hline(yintercept= .90, linetype='dotted', col = 'darkblue', size=1)+
  ggtitle("Two-Sampe T-Test Power Calculation")


#ANOVA - 4 group assumption
cohen.ES(test= "anov", size="medium")
## 
##      Conventional effect size from Cohen (1982) 
## 
##            test = anov
##            size = medium
##     effect.size = 0.25
apower<- plot(pwr.anova.test( k=4, n=NULL, f=0.25, sig.level=.05,power=.9))+
  set_theme(base= theme_apa()) +
  geom_hline(yintercept= .80, linetype='solid', col = 'red', alpha=0.5)+
  geom_hline(yintercept= .90, linetype='dotted', col = 'darkblue', size=1)+
  ggtitle("One-Way ANOVA Power Calculation (4-level variable)")

cohen.ES(test= "r", size="medium")
## 
##      Conventional effect size from Cohen (1982) 
## 
##            test = r
##            size = medium
##     effect.size = 0.3
rpower <-plot(pwr.r.test(n=NULL, r=0.3, sig.level=0.5, power=0.9))+
  set_theme(base= theme_apa()) +
  geom_hline(yintercept= .80, linetype='solid', col = 'red', alpha=0.5)+
  geom_hline(yintercept= .90, linetype='dotted', col = 'darkblue', size=1)+
  ggtitle("Correlation Power Calculation (arctangh transformation")

wrap_plots( tpower, apower, rpower)

cohen.ES(test= "f2", size="medium")
## 
##      Conventional effect size from Cohen (1982) 
## 
##            test = f2
##            size = medium
##     effect.size = 0.15
pwr.f2.test(u=8, v=NULL, f2= .15, sig.level=0.05, power=0.9)
## 
##      Multiple regression power calculation 
## 
##               u = 8
##               v = 126.1791
##              f2 = 0.15
##       sig.level = 0.05
##           power = 0.9


Once again, based off these calculations, our sample size of 236 seems to be more than adequate to perform these types of analyses and to adequately test for a “medium effect,” if a significant result is found.

(Note that the results for multivariate regression could not be graphed, but according to the cohen.ES function from the effsize package, a regression with eight covariates (not likely, but assumed here for conservatism), requires about 126 subjects to achieve a “medium effect.”





DEMOGRAPHIC DATA


Question 1: Distribution of RACE x AGE


#BIRTHSEX Female/Male Totals

sqldf("select BIRTHSEX,
              count(RECORD_ID) as N
       from SURVEY 
       where BIRTHSEX in ('F','M') 
       group by 1")
##   BIRTHSEX   N
## 1        F 113
## 2        M 123
#Chi-Square Test of Proportions

fcount <- length(SURVEY$BIRTHSEX[SURVEY$BIRTHSEX =="F"])
mcount <- length(SURVEY$BIRTHSEX[SURVEY$BIRTHSEX =="M"])
fcount
## [1] 113
mcount
## [1] 123
females_males = c(fcount, mcount )
chisq.test(females_males, p= c(1/2, 1/2))
## 
##  Chi-squared test for given probabilities
## 
## data:  females_males
## X-squared = 0.42373, df = 1, p-value = 0.5151
#GENDER Woman/Man/Other Totals

sqldf("select GENDER,
              count(RECORD_ID) as N
       from SURVEY 
       where BIRTHSEX in ('F','M') 
       group by 1")
##   GENDER   N
## 1    MAN 123
## 2  OTHER   1
## 3  WOMAN 112
#Chi-Square Test of Proportions

womancount <- length(SURVEY$GENDER[SURVEY$GENDER =="WOMAN"])
mancount <- length(SURVEY$GENDER[SURVEY$GENDER =="MAN"])
othcount <- length(SURVEY$GENDER[SURVEY$GENDER =="OTHER"])
womancount
## [1] 112
mancount
## [1] 123
othcount
## [1] 1
gender_spread = c(womancount, mancount, othcount )
chisq.test(gender_spread, p= c(1/3, 1/3, 1/3))
## 
##  Chi-squared test for given probabilities
## 
## data:  gender_spread
## X-squared = 115.79, df = 2, p-value < 2.2e-16
#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$AGE2, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Age Distribution by Birth Sex",
          axis.titles = c('Respondents Age Bands'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

SUBSET2 <-
SURVEY %>% 
  filter( !is.na(AGE2)) %>% 
  mutate(BIRTHSEX = recode_factor( BIRTHSEX, "M" = "MALES", "F" = "FEMALES", .ordered=T))


ggplot(  SUBSET2, aes(x= AGE2)) + facet_grid( SUBSET2$BIRTHSEX ) +  
  geom_bar(aes(fill= BIRTHSEX) ) +
  stat_count(geom="text", aes(label=..count..), vjust= -.1) + 
  scale_fill_manual( values = c( "MALES"="darkgrey", "FEMALES"="#006cc5"), guide = "none" )+
  theme_538()  +
  xlab("Age Bands")+ ylab("Counts") 

#Alternative View, if Birth Sex by Age is desired 


CrossTable(SURVEY$AGE, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  236 
## 
##  
##              | SURVEY$BIRTHSEX 
##   SURVEY$AGE |         F |         M | Row Total | 
## -------------|-----------|-----------|-----------|
##         < 30 |        10 |         8 |        18 | 
##              |     0.556 |     0.444 |     0.076 | 
##              |     0.088 |     0.065 |           | 
## -------------|-----------|-----------|-----------|
##        30-34 |        93 |        90 |       183 | 
##              |     0.508 |     0.492 |     0.775 | 
##              |     0.823 |     0.732 |           | 
## -------------|-----------|-----------|-----------|
##        35-40 |        10 |        24 |        34 | 
##              |     0.294 |     0.706 |     0.144 | 
##              |     0.088 |     0.195 |           | 
## -------------|-----------|-----------|-----------|
##        41-50 |         0 |         1 |         1 | 
##              |     0.000 |     1.000 |     0.004 | 
##              |     0.000 |     0.008 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       113 |       123 |       236 | 
##              |     0.479 |     0.521 |           | 
## -------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  6.624273     d.f. =  3     p =  0.08488823 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.05670541 
## 
## 



Question 2: Distribution of RACE x BIRTHSEX


#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$RACE, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Race Distribution by Birth Sex",
          axis.titles = c('Race Categories '), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

SUBSET2 <-
SURVEY %>% 
  filter( !is.na(RACE)) %>% 
  mutate(BIRTHSEX = recode_factor( BIRTHSEX, "M" = "MALES", "F" = "FEMALES", .ordered=T))


ggplot(  SUBSET2, aes(x= RACE)) + facet_grid( SUBSET2$BIRTHSEX ) +  
  geom_bar(aes(fill= BIRTHSEX) ) +
  stat_count(geom="text", aes(label=..count..), vjust= -.05) + 
  scale_fill_manual( values = c( "MALES"="darkgrey", "FEMALES"="#006cc5"), guide = "none" )+
  theme_538()  +
  xlab("Race/Ethnicity Categories")+ ylab("Counts") 

#Alternative View, if Birth Sex by Race is desired 


CrossTable(SURVEY$RACE, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r = T, prop.t=F, chisq=T, fisher=F)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  236 
## 
##  
##              | SURVEY$BIRTHSEX 
##  SURVEY$RACE |         F |         M | Row Total | 
## -------------|-----------|-----------|-----------|
##   ASIAN EAST |        24 |        13 |        37 | 
##              |     0.649 |     0.351 |     0.157 | 
##              |     0.212 |     0.106 |           | 
## -------------|-----------|-----------|-----------|
##  ASIAN SOUTH |        33 |        23 |        56 | 
##              |     0.589 |     0.411 |     0.237 | 
##              |     0.292 |     0.187 |           | 
## -------------|-----------|-----------|-----------|
##        BLACK |         5 |         6 |        11 | 
##              |     0.455 |     0.545 |     0.047 | 
##              |     0.044 |     0.049 |           | 
## -------------|-----------|-----------|-----------|
##     HISPANIC |         9 |         6 |        15 | 
##              |     0.600 |     0.400 |     0.064 | 
##              |     0.080 |     0.049 |           | 
## -------------|-----------|-----------|-----------|
##        WHITE |        36 |        68 |       104 | 
##              |     0.346 |     0.654 |     0.441 | 
##              |     0.319 |     0.553 |           | 
## -------------|-----------|-----------|-----------|
##        OTHER |         6 |         7 |        13 | 
##              |     0.462 |     0.538 |     0.055 | 
##              |     0.053 |     0.057 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       113 |       123 |       236 | 
##              |     0.479 |     0.521 |           | 
## -------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  15.27367     d.f. =  5     p =  0.009254827 
## 
## 
## 



Question 3: Distribution of TRAINING LEVEL x BIRTHSEX


#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$TRAINING_LEVEL, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training Levels (4) by Birth Sex",
          axis.titles = c('Training Levels'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Training Level is desired 


CrossTable(SURVEY$TRAINING_LEVEL, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=F)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  235 
## 
##  
##                       | SURVEY$BIRTHSEX 
## SURVEY$TRAINING_LEVEL |         F |         M | Row Total | 
## ----------------------|-----------|-----------|-----------|
##            First Year |        43 |        36 |        79 | 
##                       |     0.544 |     0.456 |     0.336 | 
##                       |     0.381 |     0.295 |           | 
## ----------------------|-----------|-----------|-----------|
##           Second Year |        41 |        36 |        77 | 
##                       |     0.532 |     0.468 |     0.328 | 
##                       |     0.363 |     0.295 |           | 
## ----------------------|-----------|-----------|-----------|
##            Third Year |        23 |        42 |        65 | 
##                       |     0.354 |     0.646 |     0.277 | 
##                       |     0.204 |     0.344 |           | 
## ----------------------|-----------|-----------|-----------|
##              Advanced |         6 |         8 |        14 | 
##                       |     0.429 |     0.571 |     0.060 | 
##                       |     0.053 |     0.066 |           | 
## ----------------------|-----------|-----------|-----------|
##          Column Total |       113 |       122 |       235 | 
##                       |     0.481 |     0.519 |           | 
## ----------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  6.449267     d.f. =  3     p =  0.09168489 
## 
## 
## 
#If it is decided to combine the Third Year/Advanced Levels, then use these tables:


RECODE <-
 SURVEY %>% 
 select( TRAINING_LEVEL, BIRTHSEX) %>% 
 mutate( TRAINING_LEVEL = recode_factor( TRAINING_LEVEL, 'First Year'= 'First Year', 
                                                            'Second Year'= 'Second Year', 
                                                            'Third Year' = 'Advanced', 
                                                            'Advanced' = "Advanced", .ordered = T) ) 

        
      plot_xtab(RECODE$TRAINING_LEVEL, RECODE$BIRTHSEX, margin = "row", 
                bar.pos = "stack", coord.flip = TRUE, 
                title = "Training Levels (3) by Birth Sex",
                axis.titles = c('Training Levels'), 
                legend.title= "Birth Sex",
                geom.colors = c("#006cc5","#cbcccb"), 
                show.summary = TRUE )+ 
        set_theme(base= theme_classic())

      #Alternative View, if Birth Sex by Training Level is desired 
      
      
      CrossTable(RECODE$TRAINING_LEVEL, RECODE$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=F)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  235 
## 
##  
##                       | RECODE$BIRTHSEX 
## RECODE$TRAINING_LEVEL |         F |         M | Row Total | 
## ----------------------|-----------|-----------|-----------|
##            First Year |        43 |        36 |        79 | 
##                       |     0.544 |     0.456 |     0.336 | 
##                       |     0.381 |     0.295 |           | 
## ----------------------|-----------|-----------|-----------|
##           Second Year |        41 |        36 |        77 | 
##                       |     0.532 |     0.468 |     0.328 | 
##                       |     0.363 |     0.295 |           | 
## ----------------------|-----------|-----------|-----------|
##              Advanced |        29 |        50 |        79 | 
##                       |     0.367 |     0.633 |     0.336 | 
##                       |     0.257 |     0.410 |           | 
## ----------------------|-----------|-----------|-----------|
##          Column Total |       113 |       122 |       235 | 
##                       |     0.481 |     0.519 |           | 
## ----------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  6.191608     d.f. =  2     p =  0.04523864 
## 
## 
## 
# Reset Training Level back to three levels 


SURVEY %<>% 
  
mutate( TRAINING_LEVEL = as.character(TRAINING_LEVEL),
        TRAINING_LEVEL = factor (TRAINING_LEVEL, levels= c('First Year','Second Year', 'Third Year', 'Advanced')),
        TRAINING_LEVEL = recode_factor( TRAINING_LEVEL, 'First Year'= 'First Year', 
                                                            'Second Year'= 'Second Year', 
                                                            'Third Year' = 'Advanced', 
                                                            'Advanced' = "Advanced", .ordered = T) )



Question 4: Distribution of HEIGHT2 x BIRTHSEX


#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$HEIGHT2, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Height Bands by Birth Sex",
          axis.titles = c('Height Bands'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

SUBSET2 <-
SURVEY %>% 
  filter( !is.na(HEIGHT2)) %>% 
  mutate(BIRTHSEX = recode_factor( BIRTHSEX, "M" = "MALES", "F" = "FEMALES", .ordered=T))


ggplot(  SUBSET2, aes(x= HEIGHT2)) + facet_grid( SUBSET2$BIRTHSEX ) +  
  geom_bar(aes(fill= BIRTHSEX) ) +
  stat_count(geom="text", aes(label=..count..), vjust= -.3) + 
  scale_fill_manual( values = c( "MALES"="darkgrey", "FEMALES"="#006cc5"), guide = "none" )+
  theme_538()  +
  xlab("Height Bands")+ ylab("Counts") 

#Alternative View, if Birth Sex by Height is desired 

CrossTable(SURVEY$HEIGHT2, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r = F, prop.t=F, chisq=T, fisher=F)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  235 
## 
##  
##                | SURVEY$BIRTHSEX 
## SURVEY$HEIGHT2 |         F |         M | Row Total | 
## ---------------|-----------|-----------|-----------|
##           < 5' |         2 |         0 |         2 | 
##                |     0.018 |     0.000 |           | 
## ---------------|-----------|-----------|-----------|
##          5-5'3 |        38 |         2 |        40 | 
##                |     0.339 |     0.016 |           | 
## ---------------|-----------|-----------|-----------|
##        5'4-5'6 |        46 |        10 |        56 | 
##                |     0.411 |     0.081 |           | 
## ---------------|-----------|-----------|-----------|
##        5'7-5'9 |        25 |        41 |        66 | 
##                |     0.223 |     0.333 |           | 
## ---------------|-----------|-----------|-----------|
##        5'10-6' |         1 |        44 |        45 | 
##                |     0.009 |     0.358 |           | 
## ---------------|-----------|-----------|-----------|
##        6'1-6'4 |         0 |        24 |        24 | 
##                |     0.000 |     0.195 |           | 
## ---------------|-----------|-----------|-----------|
##          > 6'4 |         0 |         2 |         2 | 
##                |     0.000 |     0.016 |           | 
## ---------------|-----------|-----------|-----------|
##   Column Total |       112 |       123 |       235 | 
##                |     0.477 |     0.523 |           | 
## ---------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  128.2767     d.f. =  6     p =  2.963565e-25 
## 
## 
## 
#Same Data, but using Sex-Adjusetd Height Bands (Short, Average, Tall)

SUBSET2 <-
SURVEY %>% 
  filter( !is.na(HTCATSEX)) %>% 
  mutate(BIRTHSEX = recode_factor( BIRTHSEX, "M" = "MALES", "F" = "FEMALES", .ordered=T)) %>% select( BIRTHSEX, HTCATSEX, HEIGHT)



ggplot(  SUBSET2, aes(x= HTCATSEX)) + facet_grid( SUBSET2$BIRTHSEX ) +  
  geom_bar(aes(fill= BIRTHSEX) ) +
  stat_count(geom="text", aes(label=..count..), vjust= -.3) + 
  scale_fill_manual( values = c( "MALES"="darkgrey", "FEMALES"="#006cc5"), guide = "none" )+
  scale_x_discrete( limits = c('SHORT', 'AVG', 'TALL'))+
  theme_538()  +
  xlab(paste0("Sex-Adjusted Height Bands","\n","\n","(Female AVG = Band 3, 5'4-5'6  * Male AVG = Band 5, 5'10-6')"))+ ylab("Counts") 

#Alternative View, if Birth Sex by Height is desired 

CrossTable(SURVEY$HTCATSEX  , SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r = F, prop.t=F, chisq=T, fisher=F)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  236 
## 
##  
##                 | SURVEY$BIRTHSEX 
## SURVEY$HTCATSEX |         F |         M | Row Total | 
## ----------------|-----------|-----------|-----------|
##            TALL |        27 |        26 |        53 | 
##                 |     0.239 |     0.211 |           | 
## ----------------|-----------|-----------|-----------|
##           SHORT |        40 |        53 |        93 | 
##                 |     0.354 |     0.431 |           | 
## ----------------|-----------|-----------|-----------|
##             AVG |        46 |        44 |        90 | 
##                 |     0.407 |     0.358 |           | 
## ----------------|-----------|-----------|-----------|
##    Column Total |       113 |       123 |       236 | 
##                 |     0.479 |     0.521 |           | 
## ----------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  1.459408     d.f. =  2     p =  0.4820516 
## 
## 
## 



Question 5: Distribution of GLOVE SIZE x BIRTHSEX


require(ggstatsplot)

#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$GLOVE, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Glove Size by Birth Sex",
          axis.titles = c('Glove Sizes'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Glove Size is desired 


CrossTable(SURVEY$GLOVE, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  231 
## 
##  
##              | SURVEY$BIRTHSEX 
## SURVEY$GLOVE |         F |         M | Row Total | 
## -------------|-----------|-----------|-----------|
##            5 |         2 |         0 |         2 | 
##              |     1.000 |     0.000 |     0.009 | 
##              |     0.018 |     0.000 |           | 
## -------------|-----------|-----------|-----------|
##          5.5 |         7 |         0 |         7 | 
##              |     1.000 |     0.000 |     0.030 | 
##              |     0.064 |     0.000 |           | 
## -------------|-----------|-----------|-----------|
##            6 |        29 |         0 |        29 | 
##              |     1.000 |     0.000 |     0.126 | 
##              |     0.264 |     0.000 |           | 
## -------------|-----------|-----------|-----------|
##          6.5 |        55 |         6 |        61 | 
##              |     0.902 |     0.098 |     0.264 | 
##              |     0.500 |     0.050 |           | 
## -------------|-----------|-----------|-----------|
##            7 |        15 |        36 |        51 | 
##              |     0.294 |     0.706 |     0.221 | 
##              |     0.136 |     0.298 |           | 
## -------------|-----------|-----------|-----------|
##          7.5 |         2 |        60 |        62 | 
##              |     0.032 |     0.968 |     0.268 | 
##              |     0.018 |     0.496 |           | 
## -------------|-----------|-----------|-----------|
##            8 |         0 |        16 |        16 | 
##              |     0.000 |     1.000 |     0.069 | 
##              |     0.000 |     0.132 |           | 
## -------------|-----------|-----------|-----------|
##          8.5 |         0 |         3 |         3 | 
##              |     0.000 |     1.000 |     0.013 | 
##              |     0.000 |     0.025 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       110 |       121 |       231 | 
##              |     0.476 |     0.524 |           | 
## -------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  159.1027     d.f. =  7     p =  4.954612e-31 
## 
## 
## 
#Median Glove Size - Sex Difference


  SURVEY %>% 
  group_by( BIRTHSEX) %>% 
  summarize( GLOVE_MEDIAN = median(GLOVE, na.rm=T))
## # A tibble: 2 × 2
##   BIRTHSEX GLOVE_MEDIAN
##   <fct>           <dbl>
## 1 F                 6.5
## 2 M                 7.5
ggbetweenstats( data= SURVEY,
                x = BIRTHSEX,
                y = GLOVE,
                type="nonparametric",
                p.adjust.method = "none")

  SURVEY %>% 
      filter( !is.na(GLOVE) ) %>% 
  ggplot(aes( x=GLOVE, y= stat(density), fill= BIRTHSEX))+
  geom_density( alpha=0.5, position = "identity" ) + 
  scale_x_continuous(breaks = scales::breaks_width(0.5) ) +
    scale_y_continuous(breaks = scales::breaks_width(.25))+
    scale_fill_manual(values = c("red","darkgreen"), name ="Birth Sex", labels = c("Female","Male"))+
  xlab("Glove Size")+ ylab("Density")+
  ggtitle("Glove Size Density Curve by Sex",  subtitle = "Shows F/M distributions are almost identical, just off by one full size") +
  theme_538()



Question 6: Distribution of AVG ENDOSCOPY HOURS x BIRTHSEX


#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$PERFORMANCE_HOURS, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Performance Hours by Birth Sex",
          axis.titles = c('Performance Hour Bands'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Performance Hours is desired 


CrossTable(SURVEY$PERFORMANCE_HOURS, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  235 
## 
##  
##                          | SURVEY$BIRTHSEX 
## SURVEY$PERFORMANCE_HOURS |         F |         M | Row Total | 
## -------------------------|-----------|-----------|-----------|
##                     < 10 |        26 |        30 |        56 | 
##                          |     0.464 |     0.536 |     0.238 | 
##                          |     0.232 |     0.244 |           | 
## -------------------------|-----------|-----------|-----------|
##                    10-20 |        55 |        55 |       110 | 
##                          |     0.500 |     0.500 |     0.468 | 
##                          |     0.491 |     0.447 |           | 
## -------------------------|-----------|-----------|-----------|
##                    21-30 |        20 |        29 |        49 | 
##                          |     0.408 |     0.592 |     0.209 | 
##                          |     0.179 |     0.236 |           | 
## -------------------------|-----------|-----------|-----------|
##                    31-40 |         8 |         5 |        13 | 
##                          |     0.615 |     0.385 |     0.055 | 
##                          |     0.071 |     0.041 |           | 
## -------------------------|-----------|-----------|-----------|
##                     > 40 |         3 |         4 |         7 | 
##                          |     0.429 |     0.571 |     0.030 | 
##                          |     0.027 |     0.033 |           | 
## -------------------------|-----------|-----------|-----------|
##             Column Total |       112 |       123 |       235 | 
##                          |     0.477 |     0.523 |           | 
## -------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  2.264007     d.f. =  4     p =  0.6873295 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.6885774 
## 
## 



Question 7: Distribution of TEACHER SEX PEREFERENCE x BIRTHSEX


#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$BIRTHSEX, SURVEY$TEACHER_GENDER_PREFERENCE,  margin = "row", 
          bar.pos = "stack", coord.flip = TRUE,
          title = "Teacher Sex Preference by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Teacher Sex Pref?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



Question 8A & 8B: Distribution of NUMBER OF TEACHERS BY SEX x BIRTHSEX


#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$FEMALE_TRAINERS, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Number of Female Trainers by Birth Sex",
          axis.titles = c('Approx. Female Trainers'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Number of Female Trainers is desired 

CrossTable(SURVEY$FEMALE_TRAINERS, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  236 
## 
##  
##                        | SURVEY$BIRTHSEX 
## SURVEY$FEMALE_TRAINERS |         F |         M | Row Total | 
## -----------------------|-----------|-----------|-----------|
##                   None |         2 |         2 |         4 | 
##                        |     0.500 |     0.500 |     0.017 | 
##                        |     0.018 |     0.016 |           | 
## -----------------------|-----------|-----------|-----------|
##                    1-2 |        10 |        12 |        22 | 
##                        |     0.455 |     0.545 |     0.093 | 
##                        |     0.088 |     0.098 |           | 
## -----------------------|-----------|-----------|-----------|
##                    3-5 |        49 |        41 |        90 | 
##                        |     0.544 |     0.456 |     0.381 | 
##                        |     0.434 |     0.333 |           | 
## -----------------------|-----------|-----------|-----------|
##                   6-10 |        41 |        47 |        88 | 
##                        |     0.466 |     0.534 |     0.373 | 
##                        |     0.363 |     0.382 |           | 
## -----------------------|-----------|-----------|-----------|
##                   > 10 |        11 |        21 |        32 | 
##                        |     0.344 |     0.656 |     0.136 | 
##                        |     0.097 |     0.171 |           | 
## -----------------------|-----------|-----------|-----------|
##           Column Total |       113 |       123 |       236 | 
##                        |     0.479 |     0.521 |           | 
## -----------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  4.010492     d.f. =  4     p =  0.4045878 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.3982956 
## 
## 
plot_xtab(SURVEY$MALE_TRAINERS, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Number of Male Trainers by Birth Sex",
          axis.titles = c('Approx. Male Trainers'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Number of Male Trainers is desired 

CrossTable(SURVEY$MALE_TRAINERS, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  235 
## 
##  
##                      | SURVEY$BIRTHSEX 
## SURVEY$MALE_TRAINERS |         F |         M | Row Total | 
## ---------------------|-----------|-----------|-----------|
##                  1-2 |         0 |         2 |         2 | 
##                      |     0.000 |     1.000 |     0.009 | 
##                      |     0.000 |     0.016 |           | 
## ---------------------|-----------|-----------|-----------|
##                  3-5 |         8 |        11 |        19 | 
##                      |     0.421 |     0.579 |     0.081 | 
##                      |     0.071 |     0.090 |           | 
## ---------------------|-----------|-----------|-----------|
##                 6-10 |        53 |        58 |       111 | 
##                      |     0.477 |     0.523 |     0.472 | 
##                      |     0.469 |     0.475 |           | 
## ---------------------|-----------|-----------|-----------|
##                 > 10 |        52 |        51 |       103 | 
##                      |     0.505 |     0.495 |     0.438 | 
##                      |     0.460 |     0.418 |           | 
## ---------------------|-----------|-----------|-----------|
##         Column Total |       113 |       122 |       235 | 
##                      |     0.481 |     0.519 |           | 
## ---------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  2.36741     d.f. =  3     p =  0.4997301 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.644784 
## 
## 



Follow-Up Question: Is there a difference in Injury reports based on Training Level?


  #Is there a difference by Training Level?


plot_xtab(SURVEY$TRAINING_LEVEL,SURVEY$EVER_INJURED,  margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Ever Endo Injured by Training Level (3)",
          axis.titles = "Training Level", 
          legend.title= "Ever Injured?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

  # Relevel factor 
  
  INJ <-
    SURVEY %>% 
    select(EVER_INJURED, TRAINING_LEVEL, BIRTHSEX) %>%  
    mutate( TRAINING_LEVEL = factor(TRAINING_LEVEL, ordered = F),
            TRAINING_LEVEL = relevel( TRAINING_LEVEL, ref = 'First Year')) %>% drop_na() 
  
  
  injury.trng.glm <- glm( EVER_INJURED ~  TRAINING_LEVEL   , data=  INJ, family=binomial(link="logit"))
  summary(injury.trng.glm)
## 
## Call:
## glm(formula = EVER_INJURED ~ TRAINING_LEVEL, family = binomial(link = "logit"), 
##     data = INJ)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6081  -0.6081  -0.5997  -0.4001   2.2649  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                -2.4849     0.4249  -5.848 4.98e-09 ***
## TRAINING_LEVELSecond Year   0.8910     0.5226   1.705   0.0882 .  
## TRAINING_LEVELAdvanced      0.8602     0.5221   1.647   0.0995 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 186.74  on 233  degrees of freedom
## Residual deviance: 182.88  on 231  degrees of freedom
## AIC: 188.88
## 
## Number of Fisher Scoring iterations: 5
  #Test to see whether Training Level is a significant effect overall:
  
  aod::wald.test(b = coef(injury.trng.glm), Sigma = vcov(injury.trng.glm), Terms = 2:3)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 3.4, df = 2, P(> X2) = 0.18
  injury.trng.emmeans <- emmeans( injury.trng.glm,  ~ c(TRAINING_LEVEL), type= "response")
  
  
  # Odds Ratio pairings:  
  pwpm( injury.trng.emmeans, reverse= T, adjust='none')
##             First Year Second Year Advanced
## First Year    [0.0769]      0.0882   0.0995
## Second Year       2.44    [0.1688]   0.9429
## Advanced          2.36        0.97 [0.1646]
## 
## Row and column labels: TRAINING_LEVEL
## Upper triangle: P values   null = 1
## Diagonal: [Estimates] (prob)   type = "response"
## Lower triangle: Comparisons (odds.ratio)   later vs. earlier
  pairs(injury.trng.emmeans, reverse=T, adjust= "none")
##  contrast                 odds.ratio    SE  df null z.ratio p.value
##  Second Year / First Year       2.44 1.274 Inf    1   1.705  0.0882
##  Advanced / First Year          2.36 1.234 Inf    1   1.647  0.0995
##  Advanced / Second Year         0.97 0.417 Inf    1  -0.072  0.9429
## 
## Tests are performed on the log odds ratio scale
  pairs(injury.trng.emmeans, reverse=F, adjust= "none")
##  contrast                 odds.ratio    SE  df null z.ratio p.value
##  First Year / Second Year      0.410 0.214 Inf    1  -1.705  0.0882
##  First Year / Advanced         0.423 0.221 Inf    1  -1.647  0.0995
##  Second Year / Advanced        1.031 0.443 Inf    1   0.072  0.9429
## 
## Tests are performed on the log odds ratio scale
 # First Year Respondents seem to have the *lowest* OR (1.09)  & probability (0.0769) of Injury.
 # How do First Years compare to everyone else? How does Advanced compare to everyone else?  
  

  a<- contrast(injury.trng.emmeans, list("Unadj OR: EVER_INJURED =Y on TRAINING_LEVEL = FirstYr vs. All" = c( 1, -1/2,  -1/2))) %>% tbl_df()
  b<- contrast(injury.trng.emmeans, list("Unadj OR: EVER_INJURED =Y on TRAINING_LEVEL = SecondYr vs. All" = c( -1/2, 1,  -1/2))) %>% tbl_df()
  c<- contrast(injury.trng.emmeans, list("Unadj OR: EVER_INJURED =Y on TRAINING_LEVEL = Advanced vs. All" = c( -1/2, -1/2,  1))) %>% tbl_df()
  
  rbind(a,b,c)[, -c(4:6)]
## # A tibble: 3 × 4
##   contrast                                                 odds.…¹    SE p.value
##   <fct>                                                      <dbl> <dbl>   <dbl>
## 1 Unadj OR: EVER_INJURED =Y on TRAINING_LEVEL = FirstYr v…   0.417 0.198  0.0659
## 2 Unadj OR: EVER_INJURED =Y on TRAINING_LEVEL = SecondYr …   1.59  0.636  0.250 
## 3 Unadj OR: EVER_INJURED =Y on TRAINING_LEVEL = Advanced …   1.51  0.606  0.300 
## # … with abbreviated variable name ¹​odds.ratio
  rm(a,b,c)


  • Interpretation: TRAINING_LEVEL is not a significant effect in predicting the likelihood of Injury EVER_INJURED. Despite the facts that (1) the prevalence of self-reported injuries increases with each step of Training Level (First Years are the least likely to be injured (odds=1.09 or 9%), compared to Second Years (odds=1.23 or 23%) and Advanced respondents (odds=1.22 or 23%); and (2) Second Years & Advanced respondents have a higher likelihood of injury when compared against all other groups, none of these results is statistically significant.

  • [Odds, not odds ratios, are calculated as the probability of an event occurring divided by the probability of that event not occurring. In the case of First Years: 0.0769 / (1 - 0.0769) = 0.0833, which when exponentiated yields 1.0869 – which can be expressed as a rounded percentage (+9%) or a rounded factor (1.09).]

  • The result that most closely approaches significance is “First Year Odds vs. All other Training Levels,” where with an OR of 0.417, First Years have an approximate 58% reduced chance of injury compared to their senior counterparts. (Due to fewer hours worked? Due to the nature of the role, which requires more observation and less practical/tactical interaction?)





Question 10A-10E: Transient Pain Immediately Following Session - Specific Body Part


#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab( SURVEY$BIRTHSEX, SURVEY$EXPERIENCED_TRANSIENT_PAIN_HAND, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Transient Pain in Hand after Procedure by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Hand Pain?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Transient Pain in Neck/Shoulder after Procedure by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Neck/Should Pain?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX,SURVEY$EXPERIENCED_TRANSIENT_PAIN_BACK,  margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Transient Pain in Back after Procedure by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Back Pain?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$EXPERIENCED_TRANSIENT_PAIN_LEG, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Transient Pain in Leg after Procedure by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Leg Pain?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$EXPERIENCED_TRANSIENT_PAIN_FOOT,  margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Transient Pain in Foot after Procedure by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Foot Pain?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



Question 11: Told Injuries were “Growing Pains” x BIRTHSEX

(Exclude all respondents who did not report having been injured)


#SJPlot cross tabulation with Chi-Square/df
  

SUBSET <-  sqldf( "select BIRTHSEX,
                          GROWING_PAINS
                  from SURVEY
                  where GROWING_PAINS != 'NA' ")

SUBSET <- SUBSET %>% 
    mutate(GROWING_PAINS = recode_factor( GROWING_PAINS, "N" = "N",
                                                         "Y" = "Y")) %>% droplevels()

          

plot_xtab(SUBSET$BIRTHSEX, SUBSET$GROWING_PAINS, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Told Injuries were Growing Pains by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Growing Pains?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



ENDOSCOPIC SUITE ENVIRONMENT

Question 1A & 1B: Distribution of FORMAL TRAINING & INFORMAL TRAINING by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$FELLOWSHIP_FORMAL_ERGO_TRAINING, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Formal Ergo Training by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Formal Ergo Traiing?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab( SURVEY$BIRTHSEX, SURVEY$INFORMAL_TRAINING,margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Informal Ergo Training by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Informal Ergo Training?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



Question 2A-2H: Distribution of TECHNIQUES DISCUSSED DURING TRAINING by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_POSTURAL, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Postural Awareness by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Training Postural Awareness?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_BEDHEIGHT, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Bed Height Adjustments by Birth Sex",
          axis.titles = "Birth Sex",  
          legend.title= "Training Bed Height?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_BEDANGLE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Bed Angle Adjustments by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Training Bed Angle?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_MONITORHEIGHT, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Monitor Height Adjustments by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Training Monitor Height?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_MUSCULOSKELETAL, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Musculoskeletal Maneuvers by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Training Musculoskeletal?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_EXERCISE_STRETCHING, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Exercise/Stretching by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Training Exer/Stretch?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab( SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_DIAL_EXTENDERS,margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Dial Extenders by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Training Dial Ext?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Pediatric Colonoscopes by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Traiing Pedi Coloscope?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



Question 3: Distribution of BUDGET ALLOTMENT by BIRTH SEX


plot_xtab(SURVEY$ERGO_TRAINING_BUDGET, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Ergonomic Training Budget by Birth Sex",
          axis.titles = c('Ergonomic Training Budget?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Ergo Training Budget is desired 


CrossTable(SURVEY$ERGO_TRAINING_BUDGET, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  236 
## 
##  
##                             | SURVEY$BIRTHSEX 
## SURVEY$ERGO_TRAINING_BUDGET |         F |         M | Row Total | 
## ----------------------------|-----------|-----------|-----------|
##                           Y |         2 |         0 |         2 | 
##                             |     1.000 |     0.000 |     0.008 | 
##                             |     0.018 |     0.000 |           | 
## ----------------------------|-----------|-----------|-----------|
##                           N |        30 |        34 |        64 | 
##                             |     0.469 |     0.531 |     0.271 | 
##                             |     0.265 |     0.276 |           | 
## ----------------------------|-----------|-----------|-----------|
##                          DK |        81 |        89 |       170 | 
##                             |     0.476 |     0.524 |     0.720 | 
##                             |     0.717 |     0.724 |           | 
## ----------------------------|-----------|-----------|-----------|
##                Column Total |       113 |       123 |       236 | 
##                             |     0.479 |     0.521 |           | 
## ----------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  2.206704     d.f. =  2     p =  0.3317572 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.5002872 
## 
## 



Question 4A & 4B: Distribution of FEEBACK FREQUENCY & FEEDBACK BY WHOM by BIRTH SEX


plot_xtab(SURVEY$ERGO_FEEDBACK, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Ergo Feedback Frequency by Birth Sex",
          axis.titles = c('How Frequently Ergo Feedback?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Ergo Feedback Frequency is desired 


CrossTable(SURVEY$ERGO_FEEDBACK, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  236 
## 
##  
##                      | SURVEY$BIRTHSEX 
## SURVEY$ERGO_FEEDBACK |         F |         M | Row Total | 
## ---------------------|-----------|-----------|-----------|
##                Never |         2 |         4 |         6 | 
##                      |     0.333 |     0.667 |     0.025 | 
##                      |     0.018 |     0.033 |           | 
## ---------------------|-----------|-----------|-----------|
##               Rarely |        37 |        46 |        83 | 
##                      |     0.446 |     0.554 |     0.352 | 
##                      |     0.327 |     0.374 |           | 
## ---------------------|-----------|-----------|-----------|
##            Sometimes |        57 |        61 |       118 | 
##                      |     0.483 |     0.517 |     0.500 | 
##                      |     0.504 |     0.496 |           | 
## ---------------------|-----------|-----------|-----------|
##                Often |        17 |        12 |        29 | 
##                      |     0.586 |     0.414 |     0.123 | 
##                      |     0.150 |     0.098 |           | 
## ---------------------|-----------|-----------|-----------|
##         Column Total |       113 |       123 |       236 | 
##                      |     0.479 |     0.521 |           | 
## ---------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  2.22049     d.f. =  3     p =  0.5279237 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.548075 
## 
## 
plot_xtab(SURVEY$ERGO_FEEDBACK_BY_WHOM, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Who Provides Ergo Feedback by Birth Sex",
          axis.titles = c('Who Provides Feedback?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Ergo Feedback by Whom is desired 


CrossTable(SURVEY$ERGO_FEEDBACK_BY_WHOM, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  236 
## 
##  
##                                 | SURVEY$BIRTHSEX 
##    SURVEY$ERGO_FEEDBACK_BY_WHOM |         F |         M | Row Total | 
## --------------------------------|-----------|-----------|-----------|
## Do not/rarely received feedback |        15 |        22 |        37 | 
##                                 |     0.405 |     0.595 |     0.157 | 
##                                 |     0.133 |     0.179 |           | 
## --------------------------------|-----------|-----------|-----------|
##            Mostly male teachers |        17 |        22 |        39 | 
##                                 |     0.436 |     0.564 |     0.165 | 
##                                 |     0.150 |     0.179 |           | 
## --------------------------------|-----------|-----------|-----------|
##          Mostly female teachers |        20 |        16 |        36 | 
##                                 |     0.556 |     0.444 |     0.153 | 
##                                 |     0.177 |     0.130 |           | 
## --------------------------------|-----------|-----------|-----------|
##                    Both equally |        61 |        63 |       124 | 
##                                 |     0.492 |     0.508 |     0.525 | 
##                                 |     0.540 |     0.512 |           | 
## --------------------------------|-----------|-----------|-----------|
##                    Column Total |       113 |       123 |       236 | 
##                                 |     0.479 |     0.521 |           | 
## --------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  2.021954     d.f. =  3     p =  0.5678626 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.5778785 
## 
## 



Question 5A - 5G: Distribution of EQUIPMENT OPITIMIZATOIN & AVAILABILITY by BIRTH SEX


plot_xtab( SURVEY$BIRTHSEX, SURVEY$ERGO_OPTIMIZATION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Ergonomically Optimized Equipment by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Ergo Optimization?",
          geom.colors = c("#006cc5", "lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$GLOVE_SIZE_AVAILABLE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Glove Size Availability by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Glove Size Avail?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$DIAL_EXTENDERS_AVAILABLE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Dial Extender Availability by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Dial Ext Avail?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

SUBSET <-  sqldf( "select BIRTHSEX,
                          DIAL_EXTENDERS_ENCOURAGED
                  from SURVEY
                  where DIAL_EXTENDERS_ENCOURAGED != 'DU' ")

SUBSET <- SUBSET %>% 
    mutate(DIAL_EXTENDERS_ENCOURAGED = recode_factor( DIAL_EXTENDERS_ENCOURAGED, "N" = "N",
                                                         "Y" = "Y")) %>% droplevels()



plot_xtab(SUBSET$BIRTHSEX, SUBSET$DIAL_EXTENDERS_ENCOURAGED, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Dial Extenders Encouraged by Birth Sex - (Includes only subjects who use Dial Extenders)",
          axis.titles = "Birth Sex", 
          legend.title= "Dial Ext Encouraged?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

SUBSET <-  sqldf( "select BIRTHSEX,
                          DIAL_EXTENDERS_FEMALEATT
                  from SURVEY
                  where DIAL_EXTENDERS_FEMALEATT != 'NA' ")

SUBSET <- SUBSET %>% 
    mutate(DIAL_EXTENDERS_FEMALEATT = recode_factor( DIAL_EXTENDERS_FEMALEATT, "N" = "N",
                                                         "Y" = "Y")) %>% droplevels()



plot_xtab(SUBSET$BIRTHSEX, SUBSET$DIAL_EXTENDERS_FEMALEATT, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Dial Extenders Encouraged with Female Att by Birth Sex - (Includes only subjects who use Dial Extenders)",
          axis.titles = "Birth Sex", 
          legend.title= "Dial Ext FemAtt?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

SUBSET <-  sqldf( "select BIRTHSEX,
                          DIAL_EXTENDERS_MALEATT
                  from SURVEY
                  where DIAL_EXTENDERS_MALEATT != 'NA' ")

SUBSET <- SUBSET %>% 
    mutate(DIAL_EXTENDERS_MALEATT = recode_factor( DIAL_EXTENDERS_MALEATT, "N" = "N",
                                                         "Y" = "Y")) %>% droplevels()



plot_xtab(SUBSET$BIRTHSEX, SUBSET$DIAL_EXTENDERS_MALEATT, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Dial Extenders Encouraged with Male Att by Birth Sex - (Includes only subjects who use Dial Extenders)",
          axis.titles = "Birth Sex", 
          legend.title= "Dial Ext MaleAtt?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$PEDI_COLONOSCOPES_AVAILABLE, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Pediatric Colonoscopes by Birth Sex",
          axis.titles = c('Pedi Colonoscopes Available?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Pedi Colonoscopes is desired 


CrossTable(SURVEY$PEDI_COLONOSCOPES_AVAILABLE, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  235 
## 
##  
##                                    | SURVEY$BIRTHSEX 
## SURVEY$PEDI_COLONOSCOPES_AVAILABLE |         F |         M | Row Total | 
## -----------------------------------|-----------|-----------|-----------|
##                                  Y |       107 |       122 |       229 | 
##                                    |     0.467 |     0.533 |     0.974 | 
##                                    |     0.955 |     0.992 |           | 
## -----------------------------------|-----------|-----------|-----------|
##                                  N |         2 |         0 |         2 | 
##                                    |     1.000 |     0.000 |     0.009 | 
##                                    |     0.018 |     0.000 |           | 
## -----------------------------------|-----------|-----------|-----------|
##                                 DK |         3 |         1 |         4 | 
##                                    |     0.750 |     0.250 |     0.017 | 
##                                    |     0.027 |     0.008 |           | 
## -----------------------------------|-----------|-----------|-----------|
##                       Column Total |       112 |       123 |       235 | 
##                                    |     0.477 |     0.523 |           | 
## -----------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  3.475254     d.f. =  2     p =  0.1759374 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.1735968 
## 
## 



Question 6A - 6I: Distribution of TYPE OF LEAD APRON AVAILABILITY by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_AVAILABLE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Well-Fitted Lead Aprons Available",
          axis.titles = "Birth Sex", 
          legend.title= "Well-Fitted Lead Aprons?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab( SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_LW_ONEPIECE,margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons LW One-Piece Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons LW 1P?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_LW_TWOPIECE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons LW Two-Piece Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons LW 2P?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_STANDARD_ONEPIECE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons SW One-Piece Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons Std 1P?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_STANDARD_TWOPIECE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons SW Two-Piece Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons Std 2P?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_DOUBLE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons Double Lead (Maternity) Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons Double?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_THYROID, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons Thyroid Shield Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons Thyroid?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_MATERNALDOS, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons Maternal Dosimeter Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons Maternal?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_FETALDOS, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons Fetal Dosimeter Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons Fetal?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



Question 7A - 7G: Distribution of ERGONOMIC CONSIDERATIONS by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$ERGO_FORMAL_TIMEOUT_PRIOR, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Formal Ergo Timeouts at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Formal Ergo Timeout?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

CrossTable(SURVEY$ERGO_FORMAL_TIMEOUT_PRIOR, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T) 
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  234 
## 
##  
##                                  | SURVEY$BIRTHSEX 
## SURVEY$ERGO_FORMAL_TIMEOUT_PRIOR |         F |         M | Row Total | 
## ---------------------------------|-----------|-----------|-----------|
##                                N |       104 |       115 |       219 | 
##                                  |     0.475 |     0.525 |     0.936 | 
##                                  |     0.929 |     0.943 |           | 
## ---------------------------------|-----------|-----------|-----------|
##                                Y |         8 |         7 |        15 | 
##                                  |     0.533 |     0.467 |     0.064 | 
##                                  |     0.071 |     0.057 |           | 
## ---------------------------------|-----------|-----------|-----------|
##                     Column Total |       112 |       122 |       234 | 
##                                  |     0.479 |     0.521 |           | 
## ---------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  0.1921786     d.f. =  1     p =  0.6611095 
## 
## Pearson's Chi-squared test with Yates' continuity correction 
## ------------------------------------------------------------
## Chi^2 =  0.02932413     d.f. =  1     p =  0.8640328 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Sample estimate odds ratio:  0.7921006 
## 
## Alternative hypothesis: true odds ratio is not equal to 1
## p =  0.7911129 
## 95% confidence interval:  0.2356057 2.596761 
## 
## Alternative hypothesis: true odds ratio is less than 1
## p =  0.431063 
## 95% confidence interval:  0 2.184852 
## 
## Alternative hypothesis: true odds ratio is greater than 1
## p =  0.75966 
## 95% confidence interval:  0.2822357 Inf 
## 
## 
## 
plot_xtab(SURVEY$BIRTHSEX, SURVEY$ERGO_INFORMAL_TIMEOUT_PRIOR, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Informal Ergo Timeouts at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Informal Ergo Timeout?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab( SURVEY$BIRTHSEX, SURVEY$MONITORS_ADJUSTABLE,margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Monitors Easily Adjustable at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Monitors Easily Adjust?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TEACHER_SENSITIVITY_STATURE_HANDSIZE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Teachers Train with Sensitivity to Stature/Hand Size by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Teacher Sensitivity to Stature?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

# Is there a Height or Glove Size difference on this question, independent of sex ?





plot_xtab(SURVEY$HEIGHT2, SURVEY$TEACHER_SENSITIVITY_STATURE_HANDSIZE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Teachers Train with Sensitivity to Stature/Hand Size by Height Band",
          axis.titles = "Height Band", 
          legend.title= "Teacher Sensitivity to Stature?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

x <-
SURVEY %>% 
   mutate(HTCAT = case_when( is.na(HEIGHT2) ~ NA_real_,
                     HEIGHT2 %in% c( "< 5'" , "5-5'3" ,"5'4-5'6" ) ~ 1,
                     TRUE ~ 2),
          
          GLOVECAT = case_when( is.na(GLOVE) ~ NA_real_,
                     GLOVE >=7 ~ 2,
                     TRUE ~ 1) ) %>% select(HTCAT, GLOVECAT, TEACHER_SENSITIVITY_STATURE_HANDSIZE) %>% drop_na()
  


plot_xtab(x$HTCAT, x$TEACHER_SENSITIVITY_STATURE_HANDSIZE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Teachers Train with Sensitivity to Stature/Hand Size by Height Band v2",
          axis.titles = "Height Band Category", 
          axis.labels = c("Shorter than 5'7", "5'7 & Taller"),
          legend.title= "Teacher Sensitivity to Stature?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$GLOVE, SURVEY$TEACHER_SENSITIVITY_STATURE_HANDSIZE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Teachers Train with Sensitivity to Stature/Hand Size by Glove Size",
          axis.titles = "Glove Size", 
          legend.title= "Teacher Sensitivity to Stature?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(x$GLOVECAT, x$TEACHER_SENSITIVITY_STATURE_HANDSIZE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Teachers Train with Sensitivity to Stature/Hand Size by Glove Size v2",
          axis.titles = "Glove Size Category", 
          axis.labels = c("Less than Size 7", "Size 7 & Above"),
          legend.title= "Teacher Sensitivity to Stature?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

rm(x)


plot_xtab(SURVEY$TEACHER_SENSITIVITY_BY_GENDER, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Sex of Sensitive Teachers by Birth Sex",
          axis.titles = c('Male or Female Teachers More Sensitive?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Male or Female Teacher more Sensitive is desired 

CrossTable(SURVEY$TEACHER_SENSITIVITY_BY_GENDER, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  234 
## 
##  
##                                      | SURVEY$BIRTHSEX 
## SURVEY$TEACHER_SENSITIVITY_BY_GENDER |         F |         M | Row Total | 
## -------------------------------------|-----------|-----------|-----------|
##                                 Male |         8 |         8 |        16 | 
##                                      |     0.500 |     0.500 |     0.068 | 
##                                      |     0.071 |     0.066 |           | 
## -------------------------------------|-----------|-----------|-----------|
##                               Female |        23 |        10 |        33 | 
##                                      |     0.697 |     0.303 |     0.141 | 
##                                      |     0.205 |     0.082 |           | 
## -------------------------------------|-----------|-----------|-----------|
##                         Both Equally |        55 |        72 |       127 | 
##                                      |     0.433 |     0.567 |     0.543 | 
##                                      |     0.491 |     0.590 |           | 
## -------------------------------------|-----------|-----------|-----------|
##             Never had female teacher |         3 |         2 |         5 | 
##                                      |     0.600 |     0.400 |     0.021 | 
##                                      |     0.027 |     0.016 |           | 
## -------------------------------------|-----------|-----------|-----------|
##                             Not Sure |        23 |        30 |        53 | 
##                                      |     0.434 |     0.566 |     0.226 | 
##                                      |     0.205 |     0.246 |           | 
## -------------------------------------|-----------|-----------|-----------|
##                         Column Total |       112 |       122 |       234 | 
##                                      |     0.479 |     0.521 |           | 
## -------------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  8.108789     d.f. =  4     p =  0.08767341 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.07838717 
## 
## 
RECODE <-
  SURVEY %>% 
  select( BIRTHSEX, TACTILE_INSTRUCTION_FROM_FEMALES, TACTILE_INSTRUCTION_FROM_MALES ) %>% 
    
  mutate_at( vars( contains("TACTILE_INST")), funs( case_when (is.na(.) ~ NA_character_,
                                                         . %in% c('Often', 'Rarely') ~ 'Yes',
                                                           TRUE ~ 'No')))



plot_xtab(SURVEY$BIRTHSEX, SURVEY$TACTILE_INSTRUCTION_FROM_MALES, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Tactile Instruction from Male Teachers by Birth Sex (3 Levels)",
          axis.titles = "Birth Sex", 
          legend.title= "Tactile Instruction from Males?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(RECODE$BIRTHSEX, RECODE$TACTILE_INSTRUCTION_FROM_MALES, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Tactile Instruction from Male Teachers by Birth Sex (2 Levels)",
          axis.titles = "Birth Sex", 
          legend.title= "Tactile Instruction from Males?",
          geom.colors = c("#006cc5", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TACTILE_INSTRUCTION_FROM_FEMALES, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Tactile Instruction from Female Teachers by Birth Sex (3 Levels)",
          axis.titles = "Birth Sex", 
          legend.title= "Tactile Instruction from Females?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(RECODE$BIRTHSEX, RECODE$TACTILE_INSTRUCTION_FROM_FEMALES, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Tactile Instruction from Female Teachers by Birth Sex (2 Levels)",
          axis.titles = "Birth Sex", 
          legend.title= "Tactile Instruction from Females?",
          geom.colors = c("#006cc5", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



Question 8A - 8I: Distribution of RESPECT-ORIENTED ANSWERS by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$COMFORTABLE_ASKING_NURSES,  margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Comfortable Asking Nurses for Help by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Comfortable Asking Nurses?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

SUBSET <-  sqldf( "select BIRTHSEX,
                          COMFORTABLE_ASKING_TECHS
                  from SURVEY
                  where COMFORTABLE_ASKING_TECHS != 'NA' ")

SUBSET <- SUBSET %>% 
    mutate(COMFORTABLE_ASKING_TECHS = recode_factor( COMFORTABLE_ASKING_TECHS, "N" = "N",
                                                         "Y" = "Y")) %>% droplevels()



plot_xtab(SUBSET$BIRTHSEX, SUBSET$COMFORTABLE_ASKING_TECHS, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Comfortable Asking Techs for Help by Birth Sex (Includes only respondents with Techs)",
          axis.titles = "Birth Sex", 
          legend.title= "Comfortable Asking Techs?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab( SURVEY$BIRTHSEX, SURVEY$NURSES_ASKING, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Times Asking Nurses for Help by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Times Asking Nurses?",
          geom.colors = c("#006cc5", "lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$MALE_ATTENDINGS_ASKING, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Times Male Attending Asking Nurses for Help by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Times Asking MaleAtt?",
          geom.colors = c("#006cc5", "lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

CrossTable(SURVEY$MALE_ATTENDINGS_ASKING, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  229 
## 
##  
##                               | SURVEY$BIRTHSEX 
## SURVEY$MALE_ATTENDINGS_ASKING |         F |         M | Row Total | 
## ------------------------------|-----------|-----------|-----------|
##                          Once |        49 |        63 |       112 | 
##                               |     0.438 |     0.562 |     0.489 | 
##                               |     0.454 |     0.521 |           | 
## ------------------------------|-----------|-----------|-----------|
##                         Twice |        18 |        26 |        44 | 
##                               |     0.409 |     0.591 |     0.192 | 
##                               |     0.167 |     0.215 |           | 
## ------------------------------|-----------|-----------|-----------|
##               More than twice |        41 |        32 |        73 | 
##                               |     0.562 |     0.438 |     0.319 | 
##                               |     0.380 |     0.264 |           | 
## ------------------------------|-----------|-----------|-----------|
##                  Column Total |       108 |       121 |       229 | 
##                               |     0.472 |     0.528 |           | 
## ------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  3.587705     d.f. =  2     p =  0.1663182 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.1745347 
## 
## 
plot_xtab(SURVEY$FEMALE_ATTENDINGS_ASKING, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Times Female Attending Asking Nurses for Help by Birth Sex",
          axis.titles = c('Times Female Att Asking Nurses?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

CrossTable(SURVEY$FEMALE_ATTENDINGS_ASKING, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  229 
## 
##  
##                                 | SURVEY$BIRTHSEX 
## SURVEY$FEMALE_ATTENDINGS_ASKING |         F |         M | Row Total | 
## --------------------------------|-----------|-----------|-----------|
##                            Once |        42 |        51 |        93 | 
##                                 |     0.452 |     0.548 |     0.406 | 
##                                 |     0.389 |     0.421 |           | 
## --------------------------------|-----------|-----------|-----------|
##                           Twice |        25 |        32 |        57 | 
##                                 |     0.439 |     0.561 |     0.249 | 
##                                 |     0.231 |     0.264 |           | 
## --------------------------------|-----------|-----------|-----------|
##                 More than Twice |        37 |        36 |        73 | 
##                                 |     0.507 |     0.493 |     0.319 | 
##                                 |     0.343 |     0.298 |           | 
## --------------------------------|-----------|-----------|-----------|
##          Don't work with FemAtt |         4 |         2 |         6 | 
##                                 |     0.667 |     0.333 |     0.026 | 
##                                 |     0.037 |     0.017 |           | 
## --------------------------------|-----------|-----------|-----------|
##                    Column Total |       108 |       121 |       229 | 
##                                 |     0.472 |     0.528 |           | 
## --------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  1.6784     d.f. =  3     p =  0.6417466 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.6522275 
## 
## 
plot_xtab(SURVEY$BIRTHSEX, SURVEY$RECOGNIZED_RESPECTED_ES_STAFF, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Recognized/Respected by ES Staff by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Recog by ES Staff?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$RECOGNIZED_RESPECTED_ANESTHETISTS, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Recognized/Respected by Anesthetists by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Recog by Anesthetists?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab( SURVEY$BIRTHSEX, SURVEY$RECOGNIZED_RESPECTED_GASTRO_ATTENDING,margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Recognized/Respected by Gastro Attending by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Recog by Gastro Att?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$FIRST_NAME_NO_PERMISSION,  margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "First Name Used No Permission by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "First Name No Permission?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Is there a RACE difference on this question ?


plot_xtab(SURVEY$RACE2, SURVEY$FIRST_NAME_NO_PERMISSION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "First Name Used No Permission by Race (Broad)",
          axis.titles = "Binary Race Category", 
          legend.title= "First Name Used No Permission?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Is there a TRAINING LEVEL difference on this question ?


plot_xtab(SURVEY$TRAINING_LEVEL, SURVEY$FIRST_NAME_NO_PERMISSION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "First Name Used No Permission by Training Level",
          axis.titles = "Binary Race Category", 
          legend.title= "First Name Used No Permission?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Is there a HEIGHT BAND difference on this question ?


plot_xtab(SURVEY$HEIGHT2, SURVEY$FIRST_NAME_NO_PERMISSION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "First Name Used No Permission by Height Band",
          axis.titles = "Height Band", 
          legend.title= "First Name Used No Permission?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

x <-
SURVEY %>% 
   mutate(HTCAT = case_when( is.na(HEIGHT2) ~ NA_real_,
                     HEIGHT2 %in% c( "< 5'" , "5-5'3" ,"5'4-5'6" ) ~ 1,
                     TRUE ~ 2) ) %>% select(HTCAT, FIRST_NAME_NO_PERMISSION) %>% drop_na()
  
  
  
  
plot_xtab( x$HTCAT, x$FIRST_NAME_NO_PERMISSION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "First Name Used No Permission by Height Band v2",
          axis.titles = "Height Band Category", 
          axis.labels = c("Less than 5'7", "5'7 & Above"),
          legend.title= "First Name Used No Permission?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

rm(x)



plot_xtab( SURVEY$HTCATSEX, SURVEY$FIRST_NAME_NO_PERMISSION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "First Name Used No Permission by Sex-Adjusted Height Band v3",
          axis.titles = "Sex-Adjusted Height Band Category", 
          legend.title= "First Name Used No Permission?",
          axis.labels = c("Tall", "Short", "Average"),
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



FUTURE DESIRES FOR ENDOSCOPY SUITE

Question 1: Distribution of MANDATORY ERGO TRAINING by BIRTH SEX


plot_xtab(SURVEY$ERGO_TRAINING_MANDATORY, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Should Ergo Training be Mandatory by Birth Sex",
          axis.titles = c('Mandatory Ergo Training?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Ergo Training be Mandatory is desired 

CrossTable(SURVEY$ERGO_TRAINING_MANDATORY, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T , prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  234 
## 
##  
##                                | SURVEY$BIRTHSEX 
## SURVEY$ERGO_TRAINING_MANDATORY |         F |         M | Row Total | 
## -------------------------------|-----------|-----------|-----------|
##                              Y |       112 |       116 |       228 | 
##                                |     0.491 |     0.509 |     0.974 | 
##                                |     0.991 |     0.959 |           | 
## -------------------------------|-----------|-----------|-----------|
##                              N |         0 |         2 |         2 | 
##                                |     0.000 |     1.000 |     0.009 | 
##                                |     0.000 |     0.017 |           | 
## -------------------------------|-----------|-----------|-----------|
##                             DK |         1 |         3 |         4 | 
##                                |     0.250 |     0.750 |     0.017 | 
##                                |     0.009 |     0.025 |           | 
## -------------------------------|-----------|-----------|-----------|
##                   Column Total |       113 |       121 |       234 | 
##                                |     0.483 |     0.517 |           | 
## -------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  2.799944     d.f. =  2     p =  0.2466039 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.3718787 
## 
## 

Question 2: Distribution of ERGO BUDGET FOR OPTIMIZATION by BIRTH SEX


plot_xtab(SURVEY$ERGO_OPTIMIZAITON_BUDGET_REQUIRED, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Should Budget Include Ergo Optimiation by Birth Sex",
          axis.titles = c('Budget Should Include Ergo Opti?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Ergo Optimization Budget be Mandatory is desired 

CrossTable(SURVEY$ERGO_OPTIMIZAITON_BUDGET_REQUIRED, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  233 
## 
##  
##                                          | SURVEY$BIRTHSEX 
## SURVEY$ERGO_OPTIMIZAITON_BUDGET_REQUIRED |         F |         M | Row Total | 
## -----------------------------------------|-----------|-----------|-----------|
##                                        Y |       109 |       106 |       215 | 
##                                          |     0.507 |     0.493 |     0.923 | 
##                                          |     0.965 |     0.883 |           | 
## -----------------------------------------|-----------|-----------|-----------|
##                                        N |         0 |         3 |         3 | 
##                                          |     0.000 |     1.000 |     0.013 | 
##                                          |     0.000 |     0.025 |           | 
## -----------------------------------------|-----------|-----------|-----------|
##                                       DK |         4 |        11 |        15 | 
##                                          |     0.267 |     0.733 |     0.064 | 
##                                          |     0.035 |     0.092 |           | 
## -----------------------------------------|-----------|-----------|-----------|
##                             Column Total |       113 |       120 |       233 | 
##                                          |     0.485 |     0.515 |           | 
## -----------------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  6.103736     d.f. =  2     p =  0.04727055 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.04412296 
## 
## 

Question 3: Distribution of INCREASED AVAILABILITY OF DIAL EXTENDERS by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$EXPERIENCE_IMPROVED_DIAL_EXTENDERS, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Increased Availability of Dial Extenders by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Increase Avail Dial Ext?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())


Question 4: Distribution of INCREASED AVAILABILITY OF PEDI COLONSCOPES by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$EXPERIENCE_IMPROVED_PEDI_COLONOSCOPES, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Increased Availability of Pedi Colonoscopes by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Increase Avail Pediscopes?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())


Question 5: Distribution of INCREASED AVAILABILITY OF LEAD APRONS by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$EXPERIENCE_IMPROVED_APRONS, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Increased Availability of Lead Aprons by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Improve Aprons?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())


Question 6: Distribution of ERGO TRAINING REQUIRED FOR TEACHERS by BIRTH SEX


plot_xtab(SURVEY$ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Formal Ergo Training Required for Teachers by Birth Sex",
          axis.titles = c('Ergo Training Required for Teachers?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Formal Teacher Training Mandatory is desired 

CrossTable(SURVEY$ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  234 
## 
##  
##                                               | SURVEY$BIRTHSEX 
## SURVEY$ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED |         F |         M | Row Total | 
## ----------------------------------------------|-----------|-----------|-----------|
##                                             Y |       103 |       105 |       208 | 
##                                               |     0.495 |     0.505 |     0.889 | 
##                                               |     0.912 |     0.868 |           | 
## ----------------------------------------------|-----------|-----------|-----------|
##                                             N |         1 |         4 |         5 | 
##                                               |     0.200 |     0.800 |     0.021 | 
##                                               |     0.009 |     0.033 |           | 
## ----------------------------------------------|-----------|-----------|-----------|
##                                            DK |         9 |        12 |        21 | 
##                                               |     0.429 |     0.571 |     0.090 | 
##                                               |     0.080 |     0.099 |           | 
## ----------------------------------------------|-----------|-----------|-----------|
##                                  Column Total |       113 |       121 |       234 | 
##                                               |     0.483 |     0.517 |           | 
## ----------------------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  1.976608     d.f. =  2     p =  0.3722074 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.4698464 
## 
## 


ERGONOMIC KNOWLEDGE

Question 1: Importance of Ergonomics in Relation to ERI


plot_xtab(SURVEY$BIRTHSEX, SURVEY$ERGONOMIC_IMPORTANCE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Importance of Ergonomics in Relation to ERI",
          axis.titles = "Birth Sex", 
          legend.title= "Ergo Importance Response",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())


Question 2: Mitigation Stategies to Reduce Muscle Strain Risk


plot_xtab(SURVEY$BIRTHSEX, SURVEY$MITIGATING_MUSCLE_STRAIN, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Mitigation Strategies to Reduce Muscle Strain Risk",
          axis.titles = "Birth Sex", 
          legend.title= "Muscle Strain Response",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())


Question 3: Mitigation Stategies to Reduce Muscle Strain Risk


plot_xtab(SURVEY$BIRTHSEX, SURVEY$BED_POSITION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Bed Position in Relation to the Elbow",
          axis.titles = "Birth Sex", 
          legend.title= "Bed Position Response",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())


Question 4: Best Position for Endoscopy Trainer


plot_xtab(SURVEY$BIRTHSEX, SURVEY$ENDO_TRAINER_POSITION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Best Position for Endoscopy Trainer",
          axis.titles = "Birth Sex", 
          legend.title= "Trainer Position Response",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())


Question 5: Best Time to Explore Disability Insurance


plot_xtab(SURVEY$BIRTHSEX, SURVEY$WHEN_DISABILITY_INSURANCE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Best Time to Explore Disability Insurance",
          axis.titles = "Birth Sex", 
          legend.title= "When Disab Ins Response",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

# CORRECT <-
#   
#   SURVEY %>% 
#   mutate(CORRECT = across(.cols =  ERGONOMIC_IMPORTANCE: WHEN_DISABILITY_INSURANCE , .fns = str_count, "Correct")) %>%
#           rowwise() %>%
#           mutate(COUNT_CORRECT = across(.cols = contains("Correct"), .fns = sum)) %>% 
#           select (BIRTHSEX, ERGONOMIC_IMPORTANCE: WHEN_DISABILITY_INSURANCE, COUNT_CORRECT) %>% 
#           mutate( COUNT_CORRECT = as.integer(COUNT_CORRECT) )


# CORRECT <-
#    SURVEY %>%
#     select (BIRTHSEX, ERGONOMIC_IMPORTANCE: WHEN_DISABILITY_INSURANCE,) %>%
#     mutate( COUNT_CORRECT = apply( . , 1, function(x) sum( x== "Correct", na.rm= T )))


# CORRECT <- 
# SURVEY %>%   select( ERGONOMIC_IMPORTANCE: WHEN_DISABILITY_INSURANCE) %>% 
#   rowwise() %>%
#   mutate(COUNT_CORRECT = sum(c_across( ERGONOMIC_IMPORTANCE: WHEN_DISABILITY_INSURANCE ) == "Correct"))



Ergo Quiz Score Results by BIRTHSEX and by TRAINING LEVEL:


CORRECT <-
   SURVEY %>% 
    select (RECORD_ID, BIRTHSEX, TRAINING_LEVEL,ERGO_OPTIMIZATION, ERGONOMIC_IMPORTANCE: WHEN_DISABILITY_INSURANCE,) %>% 
    mutate( ERGO_KNOWLEDGE_SELF_REPORT = case_when( ERGO_OPTIMIZATION == "Y" ~ "Y",
                                                        TRUE ~ "N" ),
            COUNT_CORRECT = rowSums( . == "Correct" )) 


eov.ttest(CORRECT, COUNT_CORRECT, BIRTHSEX)  
## [1] "F Test p.value = 0.4180924  EOV = TRUE (Pooled)"
## [1] "CORRECT : COUNT_CORRECT ~ BIRTHSEX"
## 
##  Two Sample t-test
## 
## data:  CORRECT : COUNT_CORRECT ~ BIRTHSEX
## t = 1.2945, df = 233, p-value = 0.1968
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -0.1049946  0.5072867
## sample estimates:
## mean in group F mean in group M 
##        3.504425        3.303279
ggbetweenstats( data= CORRECT,
                x = BIRTHSEX,
                y = COUNT_CORRECT,
                type="parametric",
                p.adjust.method = "none",
                title = "Mean Scores by Birth Sex", 
                bf.message=F)

#bartlett.test( COUNT_CORRECT ~  TRAINING_LEVEL , data = CORRECT3)
#Anova( lm( COUNT_CORRECT ~ TRAINING_LEVEL, data=CORRECT), type=3)

#Welch's Anova - to match ggbetween stats (unequal variances assumed in plot)
oneway.test(COUNT_CORRECT ~ TRAINING_LEVEL, data = CORRECT, var.equal = FALSE)
## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  COUNT_CORRECT and TRAINING_LEVEL
## F = 2.0749, num df = 2.0, denom df = 154.6, p-value = 0.1291
pairs(emmeans(lm( COUNT_CORRECT ~ TRAINING_LEVEL, data=CORRECT), spec='TRAINING_LEVEL'), adjust='BH')
##  contrast                 estimate    SE  df t.ratio p.value
##  First Year - Second Year  -0.3423 0.190 232  -1.802  0.1238
##  First Year - Advanced     -0.3291 0.189 232  -1.744  0.1238
##  Second Year - Advanced     0.0132 0.190 232   0.069  0.9449
## 
## P value adjustment: BH method for 3 tests
# Mean for First Years compared to All Other Years
contrast(emmeans(lm( COUNT_CORRECT ~ TRAINING_LEVEL, data=CORRECT), spec='TRAINING_LEVEL'),
         list('Unadj Means First Yr vs All Others' = c( 1, -1/2, -1/2)))
##  contrast                           estimate    SE  df t.ratio p.value
##  Unadj Means First Yr vs All Others   -0.336 0.164 232  -2.049  0.0416
ggbetweenstats( data= CORRECT,
                x = TRAINING_LEVEL,
                y = COUNT_CORRECT,
                type="parametric",
                p.adjust.method = "BH",
                title = "Mean Scores by Training Level (3)", 
                bf.message=F)

CORRECT3 <-
    CORRECT %>% 
      select( RECORD_ID, BIRTHSEX, TLEVEL=TRAINING_LEVEL, COUNT_CORRECT) %>% 
      left_join( GENDER_DIFF_DATA_LABELS[, c("RECORD_ID", "TRAINING_LEVEL")], by="RECORD_ID") %>% 
      mutate(TRAINING_LEVEL = factor (TRAINING_LEVEL, levels= c('First year fellow','Second year fellow', 'Third year fellow', 'Advanced fellow')),
             TRAINING_LEVEL = recode_factor( TRAINING_LEVEL, 'First year fellow'= 'First Year', 
                                                            'Second year fellow'= 'Second Year', 
                                                            'Third year fellow' = 'Third Year', 
                                                            'Advanced fellow' = "Advanced", .ordered = T))



#bartlett.test( COUNT_CORRECT ~  TRAINING_LEVEL , data = CORRECT3)
#Anova( lm( COUNT_CORRECT ~ TRAINING_LEVEL, data=CORRECT3), type=3)

#Welch's Anova - to match ggbetweenstats (unequal variances assumed in plot)
oneway.test(COUNT_CORRECT ~ TRAINING_LEVEL, data = CORRECT3, var.equal = FALSE)
## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  COUNT_CORRECT and TRAINING_LEVEL
## F = 1.3563, num df = 3.00, denom df = 56.27, p-value = 0.2655
pairs(emmeans(lm( COUNT_CORRECT ~ TRAINING_LEVEL, data=CORRECT3), spec='TRAINING_LEVEL'), adjust="BH")
##  contrast                 estimate    SE  df t.ratio p.value
##  First Year - Second Year -0.34227 0.190 231  -1.798  0.2948
##  First Year - Third Year  -0.33048 0.199 231  -1.660  0.2948
##  First Year - Advanced    -0.32278 0.345 231  -0.936  0.7002
##  Second Year - Third Year  0.01179 0.200 231   0.059  0.9825
##  Second Year - Advanced    0.01948 0.345 231   0.056  0.9825
##  Third Year - Advanced     0.00769 0.350 231   0.022  0.9825
## 
## P value adjustment: BH method for 6 tests
# Mean for First Years compared to All Other Years
contrast(emmeans(lm( COUNT_CORRECT ~ TRAINING_LEVEL, data=CORRECT3), spec='TRAINING_LEVEL'),
         list('Unadj Means First Yr vs All Others' = c( 1, -1/3, -1/3, -1/3)))
##  contrast                           estimate    SE  df t.ratio p.value
##  Unadj Means First Yr vs All Others   -0.332 0.183 231  -1.811  0.0714
ggbetweenstats( data= CORRECT3,
                x = TRAINING_LEVEL,
                y = COUNT_CORRECT,
                type="parametric",
                p.adjust.method = "BH",
                title = "Mean Scores by Training Level (4)", 
                bf.message=F)



  • Conclusion: When adjusted for multiple comparisons, the means among the various levels of the variable TRAINING_LEVEL are not statistically significant. This analysis holds true whether TRAINING_LEVEL is stratified by three levels (p=0.013) or by four levels (p=0.027). The mean is lowest among First Year trainees, but after the second year, the mean seems to peak and level off. It may be worth noting that if the mean for First Year trainees is contrasted against the mean for all other trainees combined, it is significant if TRAINING_LEVEL is a three-level variable (p=0.0416) but not significant if TRAINING_LEVEL is a four-level variable (p=0.0714).



Did subjects who self-reported having ERGO OPTIM knowlege do better on Quiz?


plot_xtab(CORRECT$COUNT_CORRECT, CORRECT$ERGO_KNOWLEDGE_SELF_REPORT, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Ergo Quiz Scores vs. Ergo Optim Knowledge v1",
          axis.titles = "Questions Answered Correctly on Quiz", 
          legend.title= "Familiar w/Ergo Optim?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(CORRECT$COUNT_CORRECT, CORRECT$ERGO_OPTIMIZATION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Ergo Quiz Scores vs. Ergo Optim Knowledge v2",
          axis.titles = "Questions Answered Correctly on Quiz", 
          legend.title= "Familiar w/Ergo Optim?",
          geom.colors = c( "#cbcccb","lightblue","#006cc5"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())





QUICK CORRELATIONS & ASSOCIATIONS


A WARNING ABOUT THE FOLLOWING CORRELATIONAL DATA


The appropriate tack here would be to run a series of “point biserial correlations,” which are correlations between a categorical data (structured as T/F, 0/1, or ordinal, such as Low-Medium-High or Freshman-Sophomore-Junior-Senior) against a continuous variable. In our data, the only quasi-continuous variable is GLOVE (glove size), so therefore, only the correlations where our demographic categorical data is compared to GLOVE (e.g., BIRTHSEX, AGEBAND, TRNG_LEVEL, HEIGHT_BAND) are statistically valid. (It can be argued that even GLOVE is not a true continuous variable since all values between 0 and infinity are not valid: Gloves are sized on the half-size, so a glove size of, say, 7.1 would never be possible, which means that this variable is really categorical in nature, akin to shoe size, dress size or hat size or street address. Glove size, like the other examples cited, may be numeric, but it not continuous.) For other categorical variables that we have coerced into a binary numeric – EVER_INJURED, TRANSIENT_PAIN_XXXX, ERGO_OPTIM – any correlations against other categorical variables would technically be statistically inappropriate to run, and should certainly never be reported. For our purposes here, these “correlations” should be used only to guide us to perform other, more appropriate statistical tests (such as logistic regression, which is a natural extension of the Chi-Square analyses run previously).



Task 1: Produce a CORRELATION MATRIX that displays the high-level associations among Sex, Race, Age, Height and Glove Size with other key variables of interest

  CORRMATRIX <-
    
    SURVEY %>%
    select (BIRTHSEX, RACE2, AGE2, TRAINING_LEVEL, PERFORMANCE_HOURS, HEIGHT2, GLOVE, EVER_INJURED, ERGO_OPTIMIZATION, TEACHER_SENSITIVITY_STATURE_HANDSIZE,
            FIRST_NAME_NO_PERMISSION, EXPERIENCED_TRANSIENT_PAIN_HAND : EXPERIENCED_TRANSIENT_PAIN_FOOT ) %>% 
    
    # na.omit()  %>% 
    
    mutate( SEX = case_when(  is.na(BIRTHSEX) ~ NA_real_,
                              BIRTHSEX == "M" ~ 0,
                              TRUE ~ 1 ) ,
            
            RACE = case_when( is.na(RACE2) ~ NA_real_,
                              RACE2 == "WHITE" ~ 0,
                              TRUE  ~ 1),
            
            AGECAT = case_when( is.na(AGE2) ~ NA_real_,
                                AGE2 == "< 30" ~ 1,
                                AGE2 == "30-34" ~ 2,
                                AGE2 == "35-40" ~3,
                                TRUE ~ 4),
            
            TRNG_LEVEL = case_when( is.na(TRAINING_LEVEL) ~ NA_real_,
                                    TRAINING_LEVEL == 'First Year' ~ 1,
                                    TRAINING_LEVEL == 'Second Year' ~ 2,
                                    TRUE  ~ 3) ,
            
            PERF_HRS = case_when( is.na(PERFORMANCE_HOURS) ~ NA_real_,
                                  PERFORMANCE_HOURS == '< 10' ~ 1,
                                  PERFORMANCE_HOURS == '10-20' ~ 2,
                                  TRUE  ~ 3) ,
            
            HEIGHT = case_when( is.na(HEIGHT2) ~ NA_real_,
                                HEIGHT2 == "< 5'" ~ 1,
                                HEIGHT2 == "5-5'3" ~ 2,
                                HEIGHT2 == "5'4-5'6" ~ 3,
                                HEIGHT2 == "5'7-5'9" ~ 4,
                                HEIGHT2 == "5'10-6'" ~ 5,
                                HEIGHT2 == "6'1-6'4" ~ 6,
                                TRUE ~ 7),
            
            HTCAT = case_when( is.na(HEIGHT2) ~ NA_real_,
                               HEIGHT2 %in% c( "< 5'" , "5-5'3" ,"5'4-5'6" ) ~ 1,
                               TRUE ~ 2),
            
            GLOVECAT = case_when( is.na(GLOVE) ~ NA_real_,
                               GLOVE >=7 ~ 1,
                               TRUE ~ 0),
            
            INJURY = case_when(  is.na(EVER_INJURED) ~ NA_real_,
                                 EVER_INJURED== "Y" ~ 1,
                                 TRUE ~ 0)  ,
            
            TPAIN =  case_when(   is.na(EXPERIENCED_TRANSIENT_PAIN_BACK) |
                                    is.na(EXPERIENCED_TRANSIENT_PAIN_HAND) |
                                    is.na(EXPERIENCED_TRANSIENT_PAIN_FOOT) |
                                    is.na(EXPERIENCED_TRANSIENT_PAIN_HAND) |
                                    is.na(EXPERIENCED_TRANSIENT_PAIN_LEG) |
                                    is.na(EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER) ~ NA_real_,
                                  
                                  EXPERIENCED_TRANSIENT_PAIN_BACK == "Y" |
                                    EXPERIENCED_TRANSIENT_PAIN_HAND == "Y" |
                                    EXPERIENCED_TRANSIENT_PAIN_FOOT == "Y" |
                                    EXPERIENCED_TRANSIENT_PAIN_HAND == "Y" |
                                    EXPERIENCED_TRANSIENT_PAIN_LEG == "Y" |
                                    EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER == "Y" ~ 1,
                                  TRUE ~ 0),
            
            
            ERGO_OPTIM =  case_when( is.na(ERGO_OPTIMIZATION) ~ NA_real_,
                                     ERGO_OPTIMIZATION == "Y" ~ 1,
                                     TRUE ~ 0),
            
            SENSITIVE_TEACHER = case_when (is.na(TEACHER_SENSITIVITY_STATURE_HANDSIZE) ~ NA_real_,
                                           TEACHER_SENSITIVITY_STATURE_HANDSIZE == "Y" ~ 1,
                                           TRUE ~ 0),
            
            FNAME_NP =  case_when (is.na(FIRST_NAME_NO_PERMISSION) ~ NA_real_,
                                   FIRST_NAME_NO_PERMISSION == "Y" ~ 1,
                                   TRUE ~ 0)) %>% 
    
    select( SEX, RACE, AGECAT, TRNG_LEVEL, PERF_HRS, HEIGHT, HTCAT, GLOVE, GLOVECAT, INJURY, TPAIN, ERGO_OPTIM, SENSITIVE_TEACHER, FNAME_NP)
  
  
  
  pairs.panels( CORRMATRIX, pch=21, stars=T,) 


There does not appear to be any association in this sample with HEIGHT or GLOVE and having sustained an injury or experienced any of the five specific transient pain types listed in the survey. Does this change if we adjust the correlations based on sex?


require(ppcor)


NEWCORRS <- 
  SURVEY %>% select( SEX=BIRTHSEX, 
                     INJURY = EVER_INJURED,
                     TP_HAND=EXPERIENCED_TRANSIENT_PAIN_HAND,
                     TP_BACK=EXPERIENCED_TRANSIENT_PAIN_BACK,
                     TP_NECKSHO=EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER,
                     TP_LEG=EXPERIENCED_TRANSIENT_PAIN_LEG,
                     TP_FOOT=EXPERIENCED_TRANSIENT_PAIN_FOOT,
                     GLOVE, HEIGHT,
                     TEACHER_SENSITIVITY = TEACHER_SENSITIVITY_STATURE_HANDSIZE) %>% 
  
  
  mutate_at(  vars(starts_with( c('TP_', 'INJURY'))),
              funs(case_when( is.na(.) ~ NA_real_,
                              . == 'Y' ~ 1,
                              TRUE ~ 0))) %>% 
  
  mutate( SEX = case_when( SEX == 'M' ~ 1,
                           TRUE ~ 0),
          
          TPAIN =  case_when(   is.na(TP_BACK) |
                                is.na(TP_HAND) |
                                is.na(TP_FOOT) |
                                is.na(TP_LEG) |
                                is.na(TP_NECKSHO) ~ NA_real_,
                                                                
                                TP_BACK == 1 |
                                TP_HAND == 1 |
                                TP_FOOT == 1 |
                                TP_LEG == 1 |
                                TP_NECKSHO == 1 ~ 1,
                                TRUE ~ 0),
          
          HEIGHT = case_when( is.na(HEIGHT) ~ NA_real_,
                              HEIGHT == "< 5'" ~ 1,
                              HEIGHT == "5-5'3" ~ 2,
                              HEIGHT == "5'4-5'6" ~ 3,
                              HEIGHT == "5'7-5'9" ~ 4,
                              HEIGHT == "5'10-6'" ~ 5,
                              HEIGHT == "6'1-6'4" ~ 6,
                              TRUE ~ 7),
          
          HTCAT = case_when( is.na(HEIGHT) ~ NA_real_,
                             HEIGHT  <= 3 ~ 0,
                             TRUE ~ 1),
          
          TEACHER_SENSITIVITY = case_when( is.na(TEACHER_SENSITIVITY) ~ NA_real_,
                                           TEACHER_SENSITIVITY == 'Y' ~ 1,
                                           TRUE ~ 0)) %>%  drop_na()



#[1]

# Pearson Correlations: INJURY with GLOVE, with and without partialling out SEX


# INJURY x GLOVE

e <-NEWCORRS %>% 
  select( GLOVE, INJURY) %>% 
  summarize_each(funs(cor.test(.,  NEWCORRS$GLOVE)$estimate)) %>%  `rownames<-`("Pearson r")
                 
p <-NEWCORRS %>% 
  select(GLOVE,  INJURY) %>% 
  summarize_each(funs(cor.test(.,  NEWCORRS$GLOVE)$p.value)) %>%  `rownames<-`("p Values")

x <-  tibble::rownames_to_column( rbind(e,p), 'STATISTIC')[-2]

x %>% 
    kbl(booktabs = T, format= "html", linesep='', padding=0, digits=3,
        align= c('l','r'), escape=F, 
        caption = "<b>Ever Sustained an Injury Correlation with <u>Hand Size</u> (Unadjusted)</b><br>",
        col.names= linebreak(c("Statistic",
                               "Ever Sustained<br>Injury?"))) %>%  
    kable_paper(html_font= "Helvetica Neue", full_width = F, "hover", 
                font_size=16) %>% 
    column_spec(1,   width = "5cm") %>% 
    column_spec(2, width= "3cm")
Ever Sustained an Injury Correlation with Hand Size (Unadjusted)
Statistic Ever Sustained
Injury?
Pearson r -0.020
p Values 0.759
# INJURY x GLOVE, controlling for Sex 

e <-NEWCORRS %>% 
  select( GLOVE, INJURY ) %>%
  summarise_each(funs(pcor.test(.,  NEWCORRS$GLOVE, NEWCORRS$SEX)$estimate)) %>%  `rownames<-`("Pearson partial r")

p <-NEWCORRS %>% 
  select(GLOVE,  INJURY) %>% 
  summarise_each(funs(pcor.test(.,  NEWCORRS$GLOVE, NEWCORRS$SEX)$p.value)) %>%  `rownames<-`("p Values")
y <-  tibble::rownames_to_column( rbind(e, p), 'STATISTIC')[-2]

y %>% 
    kbl(booktabs = T, format= "html", linesep='', padding=0, digits=3,
        align= c('l','r'), escape=F, 
        caption = "<b>Ever Sustained an Injury Correlation with <u>Hand Size</u> (Adjusted for BirthSex)</b><br>",
        col.names= linebreak(c("Statistic",
                               "Ever Sustained<br>Injury?"))) %>%  
    kable_paper(html_font= "Helvetica Neue", full_width = F, "hover", 
                font_size=16) %>% 
    column_spec(1,   width = "5cm") %>% 
    column_spec(2, width= "3cm")
Ever Sustained an Injury Correlation with Hand Size (Adjusted for BirthSex)
Statistic Ever Sustained
Injury?
Pearson partial r -0.022
p Values 0.740
#[2]

# Pearson Correlations: INJURY with HEIGHT, with and without partialling out SEX


# INJURY x HEIGHT

e <-NEWCORRS %>% 
  select( HEIGHT, INJURY) %>% 
  summarize_each(funs(cor.test(.,  NEWCORRS$HEIGHT)$estimate)) %>%  `rownames<-`("Pearson r")
                 
p <-NEWCORRS %>% 
  select(HEIGHT,  INJURY) %>% 
  summarize_each(funs(cor.test(.,  NEWCORRS$HEIGHT)$p.value)) %>%  `rownames<-`("p Values")

x <-  tibble::rownames_to_column( rbind(e,p), 'STATISTIC')[-2]

x %>% 
    kbl(booktabs = T, format= "html", linesep='', padding=0, digits=3,
        align= c('l','r'), escape=F, 
        caption = "<b>Ever Sustained an Injury Correlation with <u>Height Band</u> (Unadjusted)</b><br>",
        col.names= linebreak(c("Statistic",
                               "Ever Sustained<br>Injury?"))) %>%  
    kable_paper(html_font= "Helvetica Neue", full_width = F, "hover", 
                font_size=16) %>% 
    column_spec(1,   width = "5cm") %>% 
    column_spec(2, width= "3cm")
Ever Sustained an Injury Correlation with Height Band (Unadjusted)
Statistic Ever Sustained
Injury?
Pearson r 0.037
p Values 0.579
# INJURY x HEIGHT, controlling for Sex 

e <-NEWCORRS %>% 
  select( HEIGHT, INJURY ) %>%
  summarise_each(funs(pcor.test(.,  NEWCORRS$HEIGHT, NEWCORRS$SEX)$estimate)) %>%  `rownames<-`("Pearson partial r")

p <-NEWCORRS %>% 
  select(HEIGHT,  INJURY) %>% 
  summarise_each(funs(pcor.test(.,  NEWCORRS$HEIGHT, NEWCORRS$SEX)$p.value)) %>%  `rownames<-`("p Values")
y <-  tibble::rownames_to_column( rbind(e, p), 'STATISTIC')[-2]

y %>% 
    kbl(booktabs = T, format= "html", linesep='', padding=0, digits=3,
        align= c('l','r'), escape=F, 
        caption = "<b>Ever Sustained an Injury Correlation with <u>Height Band</u> (Adjusted for BirthSex)</b><br>",
        col.names= linebreak(c("Statistic",
                               "Ever Sustained<br>Injury?"))) %>%  
    kable_paper(html_font= "Helvetica Neue", full_width = F, "hover", 
                font_size=16) %>% 
    column_spec(1,   width = "5cm") %>% 
    column_spec(2, width= "3cm")
Ever Sustained an Injury Correlation with Height Band (Adjusted for BirthSex)
Statistic Ever Sustained
Injury?
Pearson partial r 0.060
p Values 0.367
  # [3] 
  
  #Transient Pain Correlations by Glove Size
  
  e <-NEWCORRS %>% 
    select( GLOVE, starts_with("TP_")) %>% 
    summarise_each(funs(cor.test(.,  NEWCORRS$GLOVE)$estimate)) %>%  `rownames<-`("Pearson r")
  
  p <-NEWCORRS %>% 
    select(GLOVE,  starts_with("TP_")) %>% 
    summarise_each(funs(cor.test(.,  NEWCORRS$GLOVE)$p.value)) %>%  `rownames<-`("p Values")
  x <-  tibble::rownames_to_column( rbind(e,p), 'STATISTIC')[, -2]
  
  
  x %>%    
    kbl(booktabs = T, format= "html", linesep='', padding=0, digits=3,
        align= c('l','r','r','r','r', 'r'), escape=F, 
        caption = "<b>Transient Pain Correlations with <u>Hand Size</u> (Unadjusted)</b><br>",
        col.names= linebreak(c("Statistic",
                               "TP Hand",
                               "TP Back",
                               "TP Neck/Sho",
                               "TP Leg",
                               "TP Foot"))) %>%  
    kable_paper(html_font= "Helvetica Neue", full_width = F, "hover", 
                font_size=16) %>% 
    column_spec(1,   width = "5cm") %>% 
    column_spec(2:6, width= "3cm")
Transient Pain Correlations with Hand Size (Unadjusted)
Statistic TP Hand TP Back TP Neck/Sho TP Leg TP Foot
Pearson r -0.098 0.037 -0.210 -0.042 -0.109
p Values 0.141 0.574 0.001 0.530 0.101
        # Transient Pain Partial Correlations by GLOVE, controlling for Sex 
  e <-NEWCORRS %>% 
    select( GLOVE,  starts_with("TP_")) %>% 
    summarise_each(funs(pcor.test(.,  NEWCORRS$GLOVE, NEWCORRS$SEX)$estimate)) %>%  `rownames<-`("Pearson partial r")
  
  p <-NEWCORRS %>% 
    select(GLOVE,  starts_with("TP_")) %>% 
    summarise_each(funs(pcor.test(.,  NEWCORRS$GLOVE, NEWCORRS$SEX)$p.value)) %>%  `rownames<-`("p Values")
  y <-  tibble::rownames_to_column( rbind(e, p), 'STATISTIC')[, -2]
  
  y  %>% 
    kbl(booktabs = T, format= "html", linesep='', padding=0, digits=3,
        align= c('l','r','r','r','r', 'r'), escape=F, 
        caption = "<b>Transient Pain Correlations with <u>Hand Size</u> (Adjusted for Birthsex)</b><br>",
        col.names= linebreak(c("Statistic",
                               "TP Hand",
                               "TP Back",
                               "TP Neck/Sho",
                               "TP Leg",
                               "TP Foot"))) %>%  
    kable_paper(html_font= "Helvetica Neue", full_width = F, "hover", 
                font_size=16) %>% 
    column_spec(1,   width = "5cm") %>% 
    column_spec(2:6, width= "3cm")
Transient Pain Correlations with Hand Size (Adjusted for Birthsex)
Statistic TP Hand TP Back TP Neck/Sho TP Leg TP Foot
Pearson partial r -0.059 0.130 -0.019 -0.041 -0.010
p Values 0.377 0.051 0.773 0.543 0.885
  # [4]
  
  # Transient Pain Correlations by Height
  
  e <-NEWCORRS %>% 
    select( HEIGHT, starts_with("TP_")) %>% 
    summarise_each(funs(cor.test(.,  NEWCORRS$HEIGHT)$estimate)) %>%  `rownames<-`("Pearson r")
  
  p <-NEWCORRS %>% 
    select(HEIGHT,  starts_with("TP_")) %>% 
    summarise_each(funs(cor.test(.,  NEWCORRS$HEIGHT)$p.value)) %>%  `rownames<-`("p Values")
  x <-  tibble::rownames_to_column( rbind(e,p), 'STATISTIC')[, -2]
  x %>%    
    kbl(booktabs = T, format= "html", linesep='', padding=0, digits=3,
                align= c('l','r','r','r','r', 'r'), escape=F, 
                caption = "<b>Transient Pain Correlations with <u>Height</u> (Unadjusted)</b><br>",
                col.names= linebreak(c("Statistic",
                                       "TP Hand",
                                       "TP Back",
                                       "TP Neck/Sho",
                                       "TP Leg",
                                       "TP Foot"))) %>%  
    kable_paper(html_font= "Helvetica Neue", full_width = F, "hover", 
                font_size=16) %>% 
    column_spec(1,   width = "5cm") %>% 
    column_spec(2:6, width= "3cm")
Transient Pain Correlations with Height (Unadjusted)
Statistic TP Hand TP Back TP Neck/Sho TP Leg TP Foot
Pearson r -0.059 0.037 -0.153 -0.045 -0.145
p Values 0.371 0.583 0.020 0.497 0.029
       # Transient Pain Partial Correlations by Height, controlling for Sex 
  e <-NEWCORRS %>% 
    select( HEIGHT,  starts_with("TP_")) %>% 
    summarise_each(funs(pcor.test(.,  NEWCORRS$HEIGHT, NEWCORRS$SEX)$estimate)) %>%  `rownames<-`("Pearson partial r")
  
  p <-NEWCORRS %>% 
    select(HEIGHT,  starts_with("TP_")) %>% 
    summarise_each(funs(pcor.test(.,  NEWCORRS$HEIGHT, NEWCORRS$SEX)$p.value)) %>%  `rownames<-`("p Values")
  y <-  tibble::rownames_to_column( rbind(e, p), 'STATISTIC')[, -2]
  
  y  %>% 
    kbl(booktabs = T, format= "html", linesep='', padding=0, digits=3,
         align= c('l','r','r','r','r', 'r'), escape=F, 
        caption = "<b>Transient Pain Correlations with <u>Height</u> (Adjusted for Birthsex)</b><br>",
         col.names= linebreak(c("Statistic",
                                "TP Hand",
                                "TP Back",
                                "TP Neck/Sho",
                                "TP Leg",
                                "TP Foot"))) %>%  
    kable_paper(html_font= "Helvetica Neue", full_width = F, "hover", 
                font_size=16) %>% 
    column_spec(1,   width = "5cm") %>% 
    column_spec(2:6, width= "3cm")
Transient Pain Correlations with Height (Adjusted for Birthsex)
Statistic TP Hand TP Back TP Neck/Sho TP Leg TP Foot
Pearson partial r -0.006 0.111 0.042 -0.043 -0.072
p Values 0.926 0.095 0.532 0.516 0.283


Task 2: Are Ergonomic Difficulties reported among respondents who were PREGNANT during their fellowship associated at all with GLOVE or HEIGHT ?

  • Total N for subjects who were pregnant during fellowship = 23
  • Only one (1) subject reported an Ergonomic Injury during her fellowship, so meaningful analysis along this metric could not be performed


plot_xtab(SURVEY$HEIGHT[SURVEY$FELLOWSHIP_PREGNANCY=='Y'],
          SURVEY$PREGNANCY_ERGO_DIFFICULTY[SURVEY$FELLOWSHIP_PREGNANCY=='Y'], 
          margin = "row", 
          bar.pos = "stack", coord.flip = F, 
          title = "Pregnant During Fellowship - Height Band x Ergo Difficulty",
          axis.titles = c('Height Band'), 
          legend.title= "Ergo Difficulty?",
          geom.colors = c("#006cc5", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())
## Warning in stats::chisq.test(ftab): Chi-squared approximation may be incorrect

plot_xtab(SURVEY$GLOVE[SURVEY$FELLOWSHIP_PREGNANCY=='Y'],
          SURVEY$PREGNANCY_ERGO_DIFFICULTY[SURVEY$FELLOWSHIP_PREGNANCY=='Y'], 
          margin = "row", 
          bar.pos = "stack", coord.flip = F, 
          title = "Pregnant During Fellowship - Glove x Ergo Difficulty",
          axis.titles = c('Glove/Hand Size'), 
          legend.title= "Ergo Difficulty?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())
## Warning in stats::chisq.test(ftab): Chi-squared approximation may be incorrect

#Ergonomic Difficulty during Pregnancy?

CrossTable(SURVEY$PREGNANCY_ERGO_DIFFICULTY, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=F, prop.t=F, chisq=F, fisher=F)  
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  23 
## 
##  
##                                  | SURVEY$BIRTHSEX 
## SURVEY$PREGNANCY_ERGO_DIFFICULTY |         F |         M | Row Total | 
## ---------------------------------|-----------|-----------|-----------|
##                                N |         9 |         0 |         9 | 
##                                  |     0.391 |       NaN |           | 
## ---------------------------------|-----------|-----------|-----------|
##                                Y |        14 |         0 |        14 | 
##                                  |     0.609 |       NaN |           | 
## ---------------------------------|-----------|-----------|-----------|
##                     Column Total |        23 |         0 |        23 | 
##                                  |     1.000 |     0.000 |           | 
## ---------------------------------|-----------|-----------|-----------|
## 
## 
CrossTable(SURVEY$PREGNANCY_ERGO_INJURY, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=F, prop.t=F, chisq=F, fisher=F) 
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  22 
## 
##  
##                              | SURVEY$BIRTHSEX 
## SURVEY$PREGNANCY_ERGO_INJURY |         F |         M | Row Total | 
## -----------------------------|-----------|-----------|-----------|
##                            N |        21 |         0 |        21 | 
##                              |     0.955 |       NaN |           | 
## -----------------------------|-----------|-----------|-----------|
##                            Y |         1 |         0 |         1 | 
##                              |     0.045 |       NaN |           | 
## -----------------------------|-----------|-----------|-----------|
##                 Column Total |        22 |         0 |        22 | 
##                              |     1.000 |     0.000 |           | 
## -----------------------------|-----------|-----------|-----------|
## 
## 
CrossTable(SURVEY$POSTPARTUM_ERGO_INJURY, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=F, prop.t=F, chisq=F, fisher=F) 
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  20 
## 
##  
##                               | SURVEY$BIRTHSEX 
## SURVEY$POSTPARTUM_ERGO_INJURY |         F |         M | Row Total | 
## ------------------------------|-----------|-----------|-----------|
##                             N |        19 |         0 |        19 | 
##                               |     0.950 |       NaN |           | 
## ------------------------------|-----------|-----------|-----------|
##                             Y |         1 |         0 |         1 | 
##                               |     0.050 |       NaN |           | 
## ------------------------------|-----------|-----------|-----------|
##                  Column Total |        20 |         0 |        20 | 
##                               |     1.000 |     0.000 |           | 
## ------------------------------|-----------|-----------|-----------|
## 
## 
# Did pregnant women report egonomic difficulties at a rate higher than women who were never pregnant during their fellowship

PREGCOMP <-

  SURVEY %>% 
    filter( BIRTHSEX == 'F') %>% 
    
    mutate( ERGO_DIFFICULTY = case_when( FELLOWSHIP_PREGNANCY == "Y" & PREGNANCY_ERGO_DIFFICULTY== "Y" ~ "Y",
                                         FELLOWSHIP_PREGNANCY == "N" & ERGO_OPTIMIZATION == "N" ~ "Y",
                                         TRUE ~ "N"
                                         )) %>% 
    select( BIRTHSEX, FELLOWSHIP_PREGNANCY, PREGNANCY_ERGO_DIFFICULTY, ERGO_DIFFICULTY, GLOVE, HEIGHT, TRAINING_LEVEL)


CrossTable(PREGCOMP$ERGO_DIFFICULTY, PREGCOMP$FELLOWSHIP_PREGNANCY, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T) 
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  112 
## 
##  
##                          | PREGCOMP$FELLOWSHIP_PREGNANCY 
## PREGCOMP$ERGO_DIFFICULTY |         N |         Y | Row Total | 
## -------------------------|-----------|-----------|-----------|
##                        N |        44 |         9 |        53 | 
##                          |     0.830 |     0.170 |     0.473 | 
##                          |     0.494 |     0.391 |           | 
## -------------------------|-----------|-----------|-----------|
##                        Y |        45 |        14 |        59 | 
##                          |     0.763 |     0.237 |     0.527 | 
##                          |     0.506 |     0.609 |           | 
## -------------------------|-----------|-----------|-----------|
##             Column Total |        89 |        23 |       112 | 
##                          |     0.795 |     0.205 |           | 
## -------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  0.7789996     d.f. =  1     p =  0.3774473 
## 
## Pearson's Chi-squared test with Yates' continuity correction 
## ------------------------------------------------------------
## Chi^2 =  0.4203739     d.f. =  1     p =  0.5167506 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Sample estimate odds ratio:  1.515329 
## 
## Alternative hypothesis: true odds ratio is not equal to 1
## p =  0.4836125 
## 95% confidence interval:  0.5451189 4.410417 
## 
## Alternative hypothesis: true odds ratio is less than 1
## p =  0.8681928 
## 95% confidence interval:  0 3.757015 
## 
## Alternative hypothesis: true odds ratio is greater than 1
## p =  0.2591851 
## 95% confidence interval:  0.630619 Inf 
## 
## 
## 
#Correlation Matrix to view "associations" with Glove Size and Height


PREGNANCY  <-
  
  SURVEY %>%
  filter( FELLOWSHIP_PREGNANCY == "Y") %>% 
  select (BIRTHSEX, RACE2, AGE2, TRAINING_LEVEL, HEIGHT2, GLOVE, PREGNANCY_ERGO_DIFFICULTY ) %>% 
  
  mutate( SEX = case_when(  is.na(BIRTHSEX) ~ NA_real_,
                            BIRTHSEX == "M" ~ 0,
                            TRUE ~ 1 ) ,
          
          RACE = case_when( is.na(RACE2) ~ NA_real_,
                            RACE2 == "WHITE" ~ 0,
                            TRUE  ~ 1),
          
          AGECAT = case_when( is.na(AGE2) ~ NA_real_,
                              AGE2 == "< 30" ~ 1,
                              AGE2 == "30-34" ~ 2,
                              AGE2 == "35-40" ~3,
                              TRUE ~ 4),
          
          TRNG_LEVEL = case_when( is.na(TRAINING_LEVEL) ~ NA_real_,
                                  TRAINING_LEVEL == 'First Year' ~ 1,
                                  TRAINING_LEVEL == 'Second Year' ~ 2,
                                  TRAINING_LEVEL == 'Advanced' ~ 3,
                                  TRUE  ~ 3) ,
          
          HEIGHT = case_when( is.na(HEIGHT2) ~ NA_real_,
                              HEIGHT2 == "< 5" ~ 1,
                              HEIGHT2 == "5-5'3" ~ 2,
                              HEIGHT2 == "5'4-5'6" ~ 3,
                              HEIGHT2 == "5'7-5'9" ~ 4,
                              HEIGHT2 == "5'10-6'" ~ 5,
                              HEIGHT2 == "6'1-6'4" ~ 6,
                              TRUE ~ 7),
          
          ERGO_DIFF = case_when( is.na(PREGNANCY_ERGO_DIFFICULTY) ~ NA_real_,
                                 PREGNANCY_ERGO_DIFFICULTY == 'N' ~ 0,
                                  TRUE  ~ 1) ) %>% 
  select( SEX, RACE, AGECAT, TRNG_LEVEL, ERGO_DIFF, GLOVE, HEIGHT)


pairs.panels( PREGNANCY[c(-1,-2)] , pch=21, stars=T) 


Task 3: How are the associations/correlations for certain variables affected if we statify them by Training Level?

  • Ergonomic Optimizaiton
  • Injury
  • Transient Pain
  • Sensitive Teachers
  • First Name No Permission


require(ggcorrplot)
## Loading required package: ggcorrplot
#Paneled Correlation Matrices for each level of "Training Level" (4)


CORRMATRIX %>%
  mutate(TRNG_LEVEL = factor(TRNG_LEVEL),
         TRNG_LEVEL = recode_factor(TRNG_LEVEL, '1'= 'First Yr', '2'= "Second Yr", '3'= "Third Yr", '4'="Advanced", .ordered=T)) %>%
  select(TRNG_LEVEL, ERGO_OPTIM, INJURY, TPAIN, SENSITIVE_TEACHER, FNAME_NP) %>%
  
  grouped_ggcorrmat(
    ## arguments relevant for `ggcorrmat`
    data = . ,
    
    grouping.var = TRNG_LEVEL,
    ## arguments relevant for `combine_plots`
    plotgrid.args = list(nrow = 2),
    annotation.args = list(
      tag_levels = "1",
      title = "Relationship among Key Variables across Training Level"
    ),
    p.adjust.method = "none"
  )

There apprears to be a few slightly significant “correlations” that might be worth exploring with simple logistic regression:
  • First Year: Transient Pain with Sensitive Teacher & wth Ergo Optimization (based on borderline p-value, not displayed)
  • Second Year: First Name & Ergo Optimization (???)
  • Advanced: Transient Pain & Sensitive Teacher


Let’s explore further with few unadjusted/adjusted regressions where Transient Pain is the outcome and Ergo Optimization is the outcome:



  • 1. Does Ergonomic Optimization influence reports of Transient Pain, with and without adjusting for Training Level?


require(car)

TLEVEL <-
CORRMATRIX %>%
  mutate(TRNG_LEVEL = factor(TRNG_LEVEL),
         TRNG_LEVEL = recode_factor(TRNG_LEVEL, '1'= 'First Yr', '2'= "Second Yr", '3'= "Advanced", '4'="Advanced", .ordered=F),
         TRNG_LEVEL =  relevel(TRNG_LEVEL, ref= "Advanced")) %>%
  select(TRNG_LEVEL, ERGO_OPTIM, INJURY, TPAIN, SENSITIVE_TEACHER, FNAME_NP)
  

      tpain.eo.glm.unadj <- glm( TPAIN ~ ERGO_OPTIM, data= TLEVEL, family= binomial(link="logit"))
      tpain.eo.emmeans.unadj <- emmeans( tpain.eo.glm.unadj,  ~ c(ERGO_OPTIM), type= "response")
      tpain.eo.glm.adj   <- glm( TPAIN ~ ERGO_OPTIM + TRNG_LEVEL, data= TLEVEL, family= binomial(link="logit"))
      tpain.eo.emmeans.adj <- emmeans( tpain.eo.glm.adj,  ~ c(ERGO_OPTIM), type= "response")
      
      
      summary(tpain.eo.glm.unadj)
## 
## Call:
## glm(formula = TPAIN ~ ERGO_OPTIM, family = binomial(link = "logit"), 
##     data = TLEVEL)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3678   0.3536   0.3536   0.4173   0.6085  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   2.7408     0.3263   8.401   <2e-16 ***
## ERGO_OPTIM   -1.1482     0.4547  -2.525   0.0116 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 146.29  on 235  degrees of freedom
## Residual deviance: 139.96  on 234  degrees of freedom
## AIC: 143.96
## 
## Number of Fisher Scoring iterations: 5
      contrast(tpain.eo.emmeans.unadj, list("Unadj OR: Ergo Optim =Y on Transient Pain =Y" = c(-1, 1)))
##  contrast                                     odds.ratio    SE  df null z.ratio
##  Unadj OR: Ergo Optim =Y on Transient Pain =Y      0.317 0.144 Inf    1  -2.525
##  p.value
##   0.0116
## 
## Tests are performed on the log odds ratio scale
      contrast(tpain.eo.emmeans.unadj, list("Unadj OR: Ergo Optim =N on Transient Pain =Y" = c(1, -1)))
##  contrast                                     odds.ratio   SE  df null z.ratio
##  Unadj OR: Ergo Optim =N on Transient Pain =Y       3.15 1.43 Inf    1   2.525
##  p.value
##   0.0116
## 
## Tests are performed on the log odds ratio scale
      summary(tpain.eo.glm.adj)
## 
## Call:
## glm(formula = TPAIN ~ ERGO_OPTIM + TRNG_LEVEL, family = binomial(link = "logit"), 
##     data = TLEVEL)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5146   0.2942   0.3555   0.4021   0.6940  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           2.7299     0.4628   5.898 3.68e-09 ***
## ERGO_OPTIM           -1.1737     0.4589  -2.558   0.0105 *  
## TRNG_LEVELFirst Yr   -0.2555     0.5261  -0.486   0.6272    
## TRNG_LEVELSecond Yr   0.3884     0.6031   0.644   0.5195    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 146.09  on 234  degrees of freedom
## Residual deviance: 138.35  on 231  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 146.35
## 
## Number of Fisher Scoring iterations: 5
      #Before running OR test, determine whether the multilevel categorical variable TRNG_LEVEL is significant overall?
      aod::wald.test(b = coef(tpain.eo.glm.adj), Sigma = vcov(tpain.eo.glm.adj), Terms = 3:4)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 1.2, df = 2, P(> X2) = 0.55
      contrast(tpain.eo.emmeans.adj, list("Adj OR: Ergo Optim =N on Transient Pain =Y" = c(1, -1)))
##  contrast                                   odds.ratio   SE  df null z.ratio
##  Adj OR: Ergo Optim =N on Transient Pain =Y       3.23 1.48 Inf    1   2.558
##  p.value
##   0.0105
## 
## Results are averaged over the levels of: TRNG_LEVEL 
## Tests are performed on the log odds ratio scale


  • Conclusion: Ergonomic Optimization is a significant effect before and after adjust for Training Level, which itself is not a significant effect. (The Odds Ratio in the adjusted model rose from 3.15 to 3.23, p= 0.0105. In other words, subjects who reported that their Endoscopy Suite environment was not ergonomically optimized were 3.23 times more likely to also report having experienced Transient Pain in one of the several body parts highlghted in the survey.)



  • 2. Does having a Sensitive Teacher with concern for your size/stature influence reports of Transient Pain, with and without adjusting for Training Level?


    tpain.steach.glm.unadj <- glm( TPAIN ~ SENSITIVE_TEACHER, data= TLEVEL, family= binomial(link="logit"))
    tpain.steach.emmeans.unadj <- emmeans( tpain.steach.glm.unadj,  ~ c(SENSITIVE_TEACHER), type= "response")
    tpain.steach.glm.adj   <- glm( TPAIN ~ SENSITIVE_TEACHER + TRNG_LEVEL, data= TLEVEL, family= binomial(link="logit"))
    tpain.steach.emmeans.adj <- emmeans( tpain.steach.glm.adj,  ~ c(SENSITIVE_TEACHER), type= "response")
    
    
    summary(tpain.steach.glm.unadj)
## 
## Call:
## glm(formula = TPAIN ~ SENSITIVE_TEACHER, family = binomial(link = "logit"), 
##     data = TLEVEL)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3126   0.3780   0.3780   0.5026   0.5026  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         2.6027     0.3664   7.103 1.22e-12 ***
## SENSITIVE_TEACHER  -0.5974     0.4640  -1.287    0.198    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 145.89  on 233  degrees of freedom
## Residual deviance: 144.18  on 232  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 148.18
## 
## Number of Fisher Scoring iterations: 5
    contrast(tpain.steach.emmeans.unadj, list("Unadj OR: Sensitive Teach =Y on Transient Pain =Y" = c(-1, 1)))
##  contrast                                          odds.ratio    SE  df null
##  Unadj OR: Sensitive Teach =Y on Transient Pain =Y       0.55 0.255 Inf    1
##  z.ratio p.value
##   -1.287  0.1980
## 
## Tests are performed on the log odds ratio scale
    contrast(tpain.steach.emmeans.unadj, list("Unadj OR: Sensitive Teach =N on Transient Pain =Y" = c(1, -1)))
##  contrast                                          odds.ratio    SE  df null
##  Unadj OR: Sensitive Teach =N on Transient Pain =Y       1.82 0.843 Inf    1
##  z.ratio p.value
##    1.287  0.1980
## 
## Tests are performed on the log odds ratio scale
    summary(tpain.steach.glm.adj)
## 
## Call:
## glm(formula = TPAIN ~ SENSITIVE_TEACHER + TRNG_LEVEL, family = binomial(link = "logit"), 
##     data = TLEVEL)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4555   0.3172   0.4189   0.5201   0.5552  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           2.5071     0.4726   5.305 1.13e-07 ***
## SENSITIVE_TEACHER    -0.5751     0.4659  -1.234    0.217    
## TRNG_LEVELFirst Yr   -0.1402     0.5169  -0.271    0.786    
## TRNG_LEVELSecond Yr   0.4573     0.5962   0.767    0.443    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 145.69  on 232  degrees of freedom
## Residual deviance: 142.78  on 229  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 150.78
## 
## Number of Fisher Scoring iterations: 5
    #Before running OR test, determine whether the multilevel categorical variable TRNG_LEVEL is significant overall?
    aod::wald.test(b = coef(tpain.steach.glm.adj), Sigma = vcov(tpain.steach.glm.adj), Terms = 3:4)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 1.1, df = 2, P(> X2) = 0.59
    contrast(tpain.steach.emmeans.adj, list("Adj OR: Sensitive Teach =N on Transient Pain =Y" = c(1, -1)))
##  contrast                                        odds.ratio    SE  df null
##  Adj OR: Sensitive Teach =N on Transient Pain =Y       1.78 0.828 Inf    1
##  z.ratio p.value
##    1.234  0.2171
## 
## Results are averaged over the levels of: TRNG_LEVEL 
## Tests are performed on the log odds ratio scale
    confint(contrast(tpain.steach.emmeans.adj, list("Adj OR: Sensitive Teach =N on Transient Pain =Y" = c(1, -1))))
##  contrast                                        odds.ratio    SE  df asymp.LCL
##  Adj OR: Sensitive Teach =N on Transient Pain =Y       1.78 0.828 Inf     0.713
##  asymp.UCL
##       4.43
## 
## Results are averaged over the levels of: TRNG_LEVEL 
## Confidence level used: 0.95 
## Intervals are back-transformed from the log odds ratio scale


  • Conclusion: Sensitive Teacher is not a significant effect before (p=0.198) or after adjust for Training Level (p=0.217), which itself is not a significant effect. (In the adjusted model, the odds of experiencing Transient Pain if one did not have a Sensitive Teacher was 1.78, but with an insignificant p-value, cl=[0.713, 4.43])



  • 3. Does having a Sensitive Teacher with concern for your size/stature influence the use/encouragement of Ergonomic Optimization, with and without adjusting for Training Level?


    eo.steach.glm.unadj <- glm( ERGO_OPTIM ~ SENSITIVE_TEACHER, data= TLEVEL, family= binomial(link="logit"))
    eo.steach.emmeans.unadj <- emmeans( eo.steach.glm.unadj,  ~ c(SENSITIVE_TEACHER), type= "response")
    eo.steach.glm.adj   <- glm( ERGO_OPTIM ~ SENSITIVE_TEACHER + TRNG_LEVEL, data= TLEVEL, family= binomial(link="logit"))
    eo.steach.emmeans.adj <- emmeans( eo.steach.glm.adj,  ~ c(SENSITIVE_TEACHER), type= "response")
    
    
    summary(eo.steach.glm.unadj)
## 
## Call:
## glm(formula = ERGO_OPTIM ~ SENSITIVE_TEACHER, family = binomial(link = "logit"), 
##     data = TLEVEL)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0080  -1.0080  -0.6809   1.3569   1.7751  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -1.3437     0.2292  -5.863 4.56e-09 ***
## SENSITIVE_TEACHER   0.9312     0.2965   3.141  0.00168 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 287.23  on 233  degrees of freedom
## Residual deviance: 276.94  on 232  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 280.94
## 
## Number of Fisher Scoring iterations: 4
    contrast(eo.steach.emmeans.unadj, list("Unadj OR: Sensitive Teach =Y on Ergo Optim =Y" = c(-1, 1)))
##  contrast                                      odds.ratio    SE  df null
##  Unadj OR: Sensitive Teach =Y on Ergo Optim =Y       2.54 0.752 Inf    1
##  z.ratio p.value
##    3.141  0.0017
## 
## Tests are performed on the log odds ratio scale
    contrast(eo.steach.emmeans.unadj, list("Unadj OR: Sensitive Teach =N on Ergo Optim =Y" = c(1, -1)))
##  contrast                                      odds.ratio    SE  df null
##  Unadj OR: Sensitive Teach =N on Ergo Optim =Y      0.394 0.117 Inf    1
##  z.ratio p.value
##   -3.141  0.0017
## 
## Tests are performed on the log odds ratio scale
    summary(eo.steach.glm.adj)
## 
## Call:
## glm(formula = ERGO_OPTIM ~ SENSITIVE_TEACHER + TRNG_LEVEL, family = binomial(link = "logit"), 
##     data = TLEVEL)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1032  -0.9311  -0.6504   1.2535   1.8582  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -1.0963     0.2961  -3.703 0.000213 ***
## SENSITIVE_TEACHER     0.9191     0.2993   3.071 0.002134 ** 
## TRNG_LEVELFirst Yr   -0.4342     0.3558  -1.221 0.222222    
## TRNG_LEVELSecond Yr  -0.3497     0.3554  -0.984 0.325109    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 284.83  on 232  degrees of freedom
## Residual deviance: 273.38  on 229  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 281.38
## 
## Number of Fisher Scoring iterations: 4
    #Before running OR test, determine whether the multilevel categorical variable TRNG_LEVEL is significant overall?
    aod::wald.test(b = coef(eo.steach.glm.adj), Sigma = vcov(eo.steach.glm.adj), Terms = 3:4)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 1.7, df = 2, P(> X2) = 0.42
    contrast(eo.steach.emmeans.adj, list("Adj OR: Sensitive Teach =Y on Ergo Optim =Y" = c(-1, 1)))
##  contrast                                    odds.ratio   SE  df null z.ratio
##  Adj OR: Sensitive Teach =Y on Ergo Optim =Y       2.51 0.75 Inf    1   3.071
##  p.value
##   0.0021
## 
## Results are averaged over the levels of: TRNG_LEVEL 
## Tests are performed on the log odds ratio scale


  • Conclusion: Sensitive Teacher is a highly significant effect before, and an extremely highly significant effect after adjusting for Training Level. Training Level itself is not a significant effect. The model summary shows that there is no significance between the two levels of TRNG_LEVEL when compared to the reference category, “Advanced.”) Beta for ST decreased slightly from +0.9312 to +0.9191, meaning that as Teacher Sensitivity increases, reports of Ergonomic Optimization also rise. (The Odds Ratio in the adjusted model remained relatively flat at 2.54 to 2.51, p= 0.0021. In other words, subjects who reported that their Endoscopy Suite environment was ergonomically optimized were 2.52 times more likely to have had instructors who were sensitive to their physical size/stature.)

  • When compared to the reference category (“Advanced”), ALL other training levels performed better (i.e., lower Beta weights), although not statistically so. Still, could this suggest that teachers are more likely to be sensitive to ergonomics when dealing with older trainees, or does it suggest that younger trainees don’t really know what proper ergonomics should entail and, therefore, don’t know how to rate their instructors on this metric? (Does it take rising to the Advanced level before a trainee fully understands what constitutes “proper ergonomics?”)



  • 4. First Name-No Permission does not appear to be correlated with Sex, but there does appear to be some correlation to Height Band. Is it significant?


  # Since height is a multilevel categorical variable, use the median Male height as the reference, 5, or 5'10-6'
  



FNAME <-
    CORRMATRIX %>% 
    select( SEX, AGECAT, RACE, HEIGHT, TRNG_LEVEL, FNAME_NP) %>% 
    mutate( SEX = factor(SEX),
            RACE = factor(RACE),
            HEIGHT = factor(HEIGHT),
            HEIGHT = relevel(HEIGHT, ref="5"),
            HTCAT = factor(HEIGHT),
            HTCAT = recode_factor(HEIGHT, '1'='0', '2'='0', '3'='0', .default = '1'),
            HTCAT = relevel(HTCAT, ref='1'),
            AGECAT = recode_factor(AGECAT, '1'='1', '2'='1', '3'='2', '4'='2'),
            AGECAT = relevel(AGECAT, ref="2"),
            
            TRNG_LEVEL = factor(TRNG_LEVEL),
            TRNG_LEVEL = relevel(TRNG_LEVEL, ref="3"))

    levels(FNAME$HEIGHT)
## [1] "5" "1" "2" "3" "4" "6" "7"
    levels(FNAME$AGECAT)
## [1] "2" "1"
    fname.height.glm.unadj <- glm( FNAME_NP ~ HEIGHT, data= FNAME, family= binomial(link="logit"))
    fname.height.emmeans.unadj <- emmeans( fname.height.glm.unadj,  ~ c(HEIGHT), type= "response")
    fname.height.glm.adj   <- glm( FNAME_NP ~ HEIGHT + SEX , data= FNAME, family= binomial(link="logit"))
    fname.height.emmeans.adj <- emmeans( fname.height.glm.adj,  ~ c(HEIGHT, SEX), type= "response")
    
    
    summary(fname.height.glm.unadj) 
## 
## Call:
## glm(formula = FNAME_NP ~ HEIGHT, family = binomial(link = "logit"), 
##     data = FNAME)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1774  -0.8806  -0.7090   1.3349   2.0184  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.2528     0.3586  -3.494 0.000476 ***
## HEIGHT1       1.2528     1.4590   0.859 0.390524    
## HEIGHT2       0.8899     0.4843   1.837 0.066151 .  
## HEIGHT3       0.5055     0.4587   1.102 0.270446    
## HEIGHT4       0.1335     0.4599   0.290 0.771534    
## HEIGHT6      -0.6444     0.7155  -0.901 0.367800    
## HEIGHT7       1.2528     1.4590   0.859 0.390524    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 275.21  on 231  degrees of freedom
## Residual deviance: 266.71  on 225  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 280.71
## 
## Number of Fisher Scoring iterations: 4
    contrast(fname.height.emmeans.unadj, list("Unadj OR: Height1 (< 5') on FNAME_NP =Y relative to 5'10-6'" =   c(-1, 1, 0, 0, 0, 0, 0)))
##  contrast                                                    odds.ratio   SE
##  Unadj OR: Height1 (< 5') on FNAME_NP =Y relative to 5'10-6'        3.5 5.11
##   df null z.ratio p.value
##  Inf    1   0.859  0.3905
## 
## Tests are performed on the log odds ratio scale
    contrast(fname.height.emmeans.unadj, list("Unadj OR: Height2 (5-5'3) on FNAME_NP =Y relative to 5'10-6'" =   c(-1, 0, 1, 0, 0, 0, 0)))
##  contrast                                                     odds.ratio   SE
##  Unadj OR: Height2 (5-5'3) on FNAME_NP =Y relative to 5'10-6'       2.43 1.18
##   df null z.ratio p.value
##  Inf    1   1.837  0.0662
## 
## Tests are performed on the log odds ratio scale
    contrast(fname.height.emmeans.unadj, list("Unadj OR: Height3 (5'4-5'6) on FNAME_NP =Y relative to 5'10-6'" = c(-1, 0, 0, 1, 0, 0, 0)))
##  contrast                                                       odds.ratio
##  Unadj OR: Height3 (5'4-5'6) on FNAME_NP =Y relative to 5'10-6'       1.66
##     SE  df null z.ratio p.value
##  0.761 Inf    1   1.102  0.2704
## 
## Tests are performed on the log odds ratio scale
    contrast(fname.height.emmeans.unadj, list("Unadj OR: Height4 (5'7-5'9) on FNAME_NP =Y relative to 5'10-6'" = c(-1, 0, 0, 0, 1, 0, 0)))
##  contrast                                                       odds.ratio
##  Unadj OR: Height4 (5'7-5'9) on FNAME_NP =Y relative to 5'10-6'       1.14
##     SE  df null z.ratio p.value
##  0.526 Inf    1   0.290  0.7715
## 
## Tests are performed on the log odds ratio scale
    contrast(fname.height.emmeans.unadj, list("Unadj OR: Height6 (6'-6'4) on FNAME_NP =Y relative to 5'10-6'" = c(-1, 0, 0, 0, 0, 1, 0)))
##  contrast                                                      odds.ratio    SE
##  Unadj OR: Height6 (6'-6'4) on FNAME_NP =Y relative to 5'10-6'      0.525 0.376
##   df null z.ratio p.value
##  Inf    1  -0.901  0.3678
## 
## Tests are performed on the log odds ratio scale
    contrast(fname.height.emmeans.unadj, list("Unadj OR: Height7 (< 6'4) on FNAME_NP =Y relative to 5'10-6'" = c(-1, 0, 0, 0, 0, 0, 1)))
##  contrast                                                     odds.ratio   SE
##  Unadj OR: Height7 (< 6'4) on FNAME_NP =Y relative to 5'10-6'        3.5 5.11
##   df null z.ratio p.value
##  Inf    1   0.859  0.3905
## 
## Tests are performed on the log odds ratio scale
    #Determine whether the multilevel categorical variable HEIGHT is significant overall in the UNADJUSTED model?
    aod::wald.test(b = coef(fname.height.glm.unadj), Sigma = vcov(fname.height.glm.unadj), Terms = 2:7)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 8.0, df = 6, P(> X2) = 0.24
    summary(fname.height.glm.adj)
## 
## Call:
## glm(formula = FNAME_NP ~ HEIGHT + SEX, family = binomial(link = "logit"), 
##     data = FNAME)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1774  -0.8764  -0.7094   1.3364   2.0184  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.25135    0.35869  -3.489 0.000485 ***
## HEIGHT1      1.31604    1.51989   0.866 0.386558    
## HEIGHT2      0.94980    0.63036   1.507 0.131871    
## HEIGHT3      0.55716    0.57511   0.969 0.332647    
## HEIGHT4      0.15675    0.48522   0.323 0.746660    
## HEIGHT6     -0.64577    0.71554  -0.902 0.366793    
## HEIGHT7      1.25135    1.45899   0.858 0.391069    
## SEX1        -0.06469    0.43536  -0.149 0.881882    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 275.21  on 231  degrees of freedom
## Residual deviance: 266.69  on 224  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 282.69
## 
## Number of Fisher Scoring iterations: 4
    #summary(fname.height.emmeans.adj)
    
    
    #Determine whether the multilevel categorical variable HEIGHT is significant overall in the ADJUSTED model?
    aod::wald.test(b = coef(fname.height.glm.adj), Sigma = vcov(fname.height.glm.adj), Terms = 2:7)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 5.3, df = 6, P(> X2) = 0.5
    # Just for fun, adjust only for SEX
     
    fname.sex.glm.unadj <- glm( FNAME_NP ~ SEX , data= FNAME, family= binomial(link="logit"))
    fname.sex.emmeans.unadj <- emmeans( fname.sex.glm.unadj,  ~ c(SEX), type= "response")
    
    
    summary(fname.sex.glm.unadj)
## 
## Call:
## glm(formula = FNAME_NP ~ SEX, family = binomial(link = "logit"), 
##     data = FNAME)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8956  -0.8956  -0.7255   1.4883   1.7109  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.2004     0.2156  -5.569 2.57e-08 ***
## SEX1          0.4938     0.2947   1.676   0.0938 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 275.86  on 232  degrees of freedom
## Residual deviance: 273.03  on 231  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 277.03
## 
## Number of Fisher Scoring iterations: 4
    #Confirm that Sex is not significant (trend with  0.10 > p-value > 0.05)
    aod::wald.test(b = coef(fname.sex.glm.unadj), Sigma = vcov(fname.sex.glm.unadj), Terms = 2:2)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 2.8, df = 1, P(> X2) = 0.094
    contrast(fname.sex.emmeans.unadj, list("Unadj OR: Females vs. Males FNAME_NP =Y" =   c(-1, 1)))
##  contrast                                odds.ratio    SE  df null z.ratio
##  Unadj OR: Females vs. Males FNAME_NP =Y       1.64 0.483 Inf    1   1.676
##  p.value
##   0.0938
## 
## Tests are performed on the log odds ratio scale
    #Use sjPlots to display resutls for all three models side-by-side: Height Only,  Height + Sex, Sex Only
    
    tab_model(fname.height.glm.unadj, fname.height.glm.adj, fname.sex.glm.unadj, title= "First Name - No Permission Logistic Models with Height Bands as Given<br>.",
              dv.labels= c('Height Cat Unadj','Height Cat Adj Sex', 'Sex Unadj'))
First Name - No Permission Logistic Models with Height Bands as Given
.
  Height Cat Unadj Height Cat Adj Sex Sex Unadj
Predictors Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p
(Intercept) 0.29 0.13 – 0.56 <0.001 0.29 0.13 – 0.56 <0.001 0.30 0.19 – 0.45 <0.001
HEIGHT [1] 3.50 0.13 – 94.01 0.391 3.73 0.13 – 108.81 0.387
HEIGHT [2] 2.43 0.95 – 6.46 0.066 2.59 0.76 – 9.06 0.132
HEIGHT [3] 1.66 0.68 – 4.19 0.270 1.75 0.57 – 5.48 0.333
HEIGHT [4] 1.14 0.47 – 2.89 0.772 1.17 0.45 – 3.09 0.747
HEIGHT [6] 0.53 0.11 – 1.96 0.368 0.52 0.11 – 1.95 0.367
HEIGHT [7] 3.50 0.13 – 94.01 0.391 3.50 0.13 – 93.88 0.391
SEX [1] 0.94 0.40 – 2.21 0.882 1.64 0.92 – 2.94 0.094
Observations 232 232 233
R2 Tjur 0.036 0.036 0.012
    #Run additional on the collapsed Height Cat variable and see what happens
    
        
    fname.htcat.glm.unadj <- glm( FNAME_NP ~ HTCAT   , data= FNAME, family= binomial(link="logit"))
    fname.htcat.emmeans.unadj <- emmeans( fname.htcat.glm.unadj,  ~ c(HTCAT), type= "response")
    
    pairs(fname.htcat.emmeans.unadj)
##  contrast        odds.ratio   SE  df null z.ratio p.value
##  HTCAT1 / HTCAT0      0.506 0.15 Inf    1  -2.301  0.0214
## 
## Tests are performed on the log odds ratio scale
    contrast(fname.htcat.emmeans.unadj, list("Unadj OR: Height < 5'7 on FNAME_NP =Y relative to 5'7+ " =   c(-1, 1)))
##  contrast                                                odds.ratio    SE  df
##  Unadj OR: Height < 5'7 on FNAME_NP =Y relative to 5'7+        1.98 0.585 Inf
##  null z.ratio p.value
##     1   2.301  0.0214
## 
## Tests are performed on the log odds ratio scale
    fname.htcat.glm.adj <- glm( FNAME_NP ~ HTCAT+ TRNG_LEVEL , data= FNAME, family= binomial(link="logit"))
    fname.htcat.emmeans.adj <- emmeans( fname.htcat.glm.adj,  ~ c(HTCAT), type= "response")
    
    fname.htcat.glm.adj2 <- glm( FNAME_NP ~ HTCAT+ RACE , data= FNAME, family= binomial(link="logit"))
    fname.htcat.emmeans.adj2 <- emmeans
    
    fname.htcat.glm.adj3 <- glm( FNAME_NP ~ HTCAT+ SEX , data= FNAME, family= binomial(link="logit"))
    fname.htcat.emmeans.adj3 <- emmeans( fname.htcat.glm.adj,  ~ c(HTCAT), type= "response")
    
    summary(fname.htcat.glm.adj3)
## 
## Call:
## glm(formula = FNAME_NP ~ HTCAT + SEX, family = binomial(link = "logit"), 
##     data = FNAME)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9504  -0.9160  -0.7034   1.4229   1.7424  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.27059    0.22234  -5.715  1.1e-08 ***
## HTCAT0       0.61917    0.40363   1.534    0.125    
## SEX1         0.09074    0.40441   0.224    0.822    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 275.21  on 231  degrees of freedom
## Residual deviance: 269.83  on 229  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 275.83
## 
## Number of Fisher Scoring iterations: 4
    tab_model(fname.htcat.glm.unadj, fname.htcat.glm.adj, fname.htcat.glm.adj2, fname.htcat.glm.adj3,
              title= "First Name - No Permission Logistic Models with Height Bands Collapsed (2) <br>.",
              dv.labels= c('Height Cat Unadj','Height Cat Adj Training Level', 'Height Cat Adj Race', 'Height Cat Adj Sex'))
First Name - No Permission Logistic Models with Height Bands Collapsed (2)
.
  Height Cat Unadj Height Cat Adj Training Level Height Cat Adj Race Height Cat Adj Sex
Predictors Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p
(Intercept) 0.29 0.19 – 0.42 <0.001 0.22 0.12 – 0.39 <0.001 0.29 0.17 – 0.47 <0.001 0.28 0.18 – 0.43 <0.001
HTCAT [0] 1.98 1.11 – 3.55 0.021 1.95 1.08 – 3.55 0.027 1.99 1.10 – 3.61 0.023 1.86 0.85 – 4.17 0.125
TRNG LEVEL [1] 1.39 0.67 – 2.93 0.382
TRNG LEVEL [2] 1.41 0.67 – 2.98 0.367
RACE [1] 0.98 0.54 – 1.78 0.937
SEX [1] 1.09 0.49 – 2.40 0.822
Observations 232 231 232 232
R2 Tjur 0.023 0.029 0.023 0.023


  • Conclusion: When compared to the median Male height (HEIGHT=5, or 5’10-6’), the odds of the subjects’ first name being used without permission decreases as the reference height is approached. Subjects who were taller, however, had mixed results: Ones who were one band above the reference height (6’-6’4), had reduced odds of being disrespected in this manner; but the tallest subjects (HEIGHT=7, or > 6’4) had the worst odds ratio of all, 3.5, a group that is singularly comprised of males (n=2). Unfortunately, none of these results was statistically significant.

  • Even after adjusting for SEX, the significance of HEIGHT in the model did not improve (in fact, it worsened, likely because the variables HEIGHT and SEX are highly correlated). An unadjusted model where SEX replaced HEIGHT as the predictor variable, yielded good results, but still not significant (p= 0.0938).

  • If the Height Category is collapsed into two groups – shorter than 5’7” and those who are 5’7” and above, there does appear to be some significance – even if controlled separately for Training Level and Race. Only controlling for Sex knocks this predictor out of sigificance, likely because the HEIGHT and SEX are so highly correlated.





PLAYGROUND (FOR TESTING IDEAS)


Question 1: Is it useful to know a “Pain Score”?

PAIN_SCORE variable is created by turing six questions into numeric variables with binary values:
  • Sustained endoscopy-related injury (Yes = 1, No/NA = 0)
  • Transient pain experienced in hand (Yes = 1, No/NA = 0)
  • Transient pain experience in neck/shoulders (Yes = 1, No/NA = 0)
  • Transient pain experienced in back (Yes = 1, No/NA = 0)
  • Transient pain experienced in legs (Yes = 1, No/NA = 0)
  • Transient pain experienced in feet (Yes = 1, No/NA = 0)


PAIN <-

SURVEY %>% 
    mutate( BIRTHSEX2 = case_when( BIRTHSEX == "M" ~ 0,
                                      TRUE ~ 1 ),
      
            PAIN_INJ = case_when (EVER_INJURED == 'Y' ~ 1,
                                  TRUE ~ 0 ),
            
            PAIN_HAND = case_when (EXPERIENCED_TRANSIENT_PAIN_HAND == "Y" ~ 1,
                                   TRUE ~ 0),

            PAIN_NECK = case_when (EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER == "Y" ~ 1,
                                   TRUE ~ 0),

            PAIN_BACK = case_when (EXPERIENCED_TRANSIENT_PAIN_BACK == "Y" ~ 1,
                                   TRUE ~ 0),

            PAIN_LEG = case_when (EXPERIENCED_TRANSIENT_PAIN_LEG == "Y" ~ 1,
                                   TRUE ~ 0),

            PAIN_FOOT = case_when (EXPERIENCED_TRANSIENT_PAIN_FOOT == "Y" ~ 1,
                                   TRUE ~ 0)) %>% 
            rowwise() %>% 
            mutate(PAIN_SCORE = sum(PAIN_INJ+ PAIN_HAND+ PAIN_NECK+ PAIN_BACK + PAIN_LEG+ PAIN_FOOT)) %>% 
  
    select( RECORD_ID, BIRTHSEX, BIRTHSEX2, RACE, RACE2, TRAINING_LEVEL, EVER_INJURED, starts_with("PAIN_") )



# Test for normality of distribution (will be sensitive to outliers)
shapiro.test(PAIN$PAIN_SCORE)
## 
##  Shapiro-Wilk normality test
## 
## data:  PAIN$PAIN_SCORE
## W = 0.89272, p-value = 6.5e-12
# Group Means & Standard Deviations
PAIN %>% 
  group_by(BIRTHSEX) %>% 
  summarize( PAINMEAN = mean(PAIN_SCORE),
             PAINSD = sd(PAIN_SCORE))
## # A tibble: 2 × 3
##   BIRTHSEX PAINMEAN PAINSD
##   <fct>       <dbl>  <dbl>
## 1 F            2.27   1.39
## 2 M            1.76   1.33
# Welch's T-test

eov.ttest(PAIN, PAIN_SCORE, BIRTHSEX)
## [1] "F Test p.value = 0.6449448  EOV = TRUE (Pooled)"
## [1] "PAIN : PAIN_SCORE ~ BIRTHSEX"
## 
##  Two Sample t-test
## 
## data:  PAIN : PAIN_SCORE ~ BIRTHSEX
## t = 2.8305, df = 234, p-value = 0.005051
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  0.1523617 0.8501565
## sample estimates:
## mean in group F mean in group M 
##        2.265487        1.764228
# Violin Plot to explore differences between sexes
ggbetweenstats( data= PAIN,
                x = BIRTHSEX,
                y = PAIN_SCORE,
                type="parametric",
                p.adjust.method = "none",
                title = "Mean Pain/Injury Scores by Birth Sex")

PAIN %>%
ggplot(aes( x= PAIN_SCORE, y= stat(density), fill= BIRTHSEX))+
geom_density( alpha=0.5, position = "identity" ) +
scale_x_continuous(breaks = scales::breaks_width(1.0) ) +
scale_y_continuous(breaks = scales::breaks_width(.1))+
scale_fill_manual(values = c("red","darkgreen"), name ="Birth Sex", labels = c("Female","Male"))+
xlab("Pain Score")+ ylab("Density")+
ggtitle("Pain Score Density Curve by Sex") +
theme_538()

corr.test(  PAIN$BIRTHSEX2,PAIN$PAIN_SCORE, method= 'pearson')
## Call:corr.test(x = PAIN$BIRTHSEX2, y = PAIN$PAIN_SCORE, method = "pearson")
## Correlation matrix 
## [1] 0.18
## Sample Size 
## [1] 236
## These are the unadjusted probability values.
##   The probability values  adjusted for multiple tests are in the p.adj object. 
## [1] 0.01
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option


Question 2: Is it useful to know a “Training Score”?

TRNG_SCORE variable is created by turing the questions into numeric variables with di- or polychotomous values:
  • Formal Ergo Training (Yes=1, No=0)
  • Informal Ergo Training, (Yes=1, No=0)
  • Posture Awareness (Yes=1, No=0)
  • Bed Height Adjustment (Yes=1, No=0)
  • Bed Angle Position (Yes=1, No=0)
  • Monitor Height Adjustment (Yes=1, No=0)
  • Special Maneuvers/Injury Avoidance (Yes=1, No=0)
  • Strength-Building Exercises (Yes=1, No=0)
  • Use of Dial Extenders (Yes=1, default=0)
  • Use of Pedi Colonoscopes (Yes=1, No=0)
  • Ergo Feedback from Teachers (Often=1, Sometimes=1, Rarely=0, Never=0)
  • Equipment optimizatIon (Yes=1, default=0)
  • Glove Size Available (Yes=1, default=0)
  • Dial Extenders Available (Yes=1, default=0)
  • Encouraged to Use Dial Ext (Yes=1, default=0)
  • Pediatric Colonoscopes Available (Yes=1, default=0)
  • Lightweight 1P Aprons Available (Yes=1, default=0)
  • Lightweight 2P Aprons Available (Yes=1, default=0)
  • Well-fitted Aprons Available (Yes=1, default=0)
  • Formal Ergo Time-Out (Yes=1, No=0)
  • Informal Ergo Time-Out (Yes=1, No=0)
  • Monitors Easily Adjustable (Yes=1, No=0)
  • Teachers Sensitivity During Training (Yes=1, No=0)
  • Received Tactile Instruction from Males (Often=1, default=0)
  • Received Tactile Instruction from Females (Often=1, default=0)


# CREATE TRAINING INDEX 

TRAINING <-
  
  SURVEY %>% 
  mutate( BIRTHSEX2 = case_when( is.na(BIRTHSEX) ~ NA_real_,
                                 BIRTHSEX == "M" ~ 0,
                                 TRUE ~ 1 ),
          
         # AGE2 = factor(AGE2, levels=rev(levels(AGE2))),
          
          AGENUM  = case_when (AGE2 == "< 30" ~ 1,
                               AGE2 == "30-34" ~2,
                               AGE2 == "35-40" ~3,
                               TRUE ~ 4),
         
         
          TRNG_FORMAL = case_when (is.na(FELLOWSHIP_FORMAL_ERGO_TRAINING) ~ NA_real_,
                                   FELLOWSHIP_FORMAL_ERGO_TRAINING == 'Y' ~ 1,
                                TRUE ~ 0 ),
          
          TRNG_INFORMAL = case_when (is.na(INFORMAL_TRAINING) ~ NA_real_,
                                     INFORMAL_TRAINING == "Y" ~ 1,
                                 TRUE ~ 0),
          
          TRNG_POSTURAL = case_when (is.na(TRAINING_TECHNIQUES_POSTURAL) ~ NA_real_,
                                    TRAINING_TECHNIQUES_POSTURAL == "Y" ~ 1,
                                 TRUE ~ 0),
          
          TRNG_BEDHT = case_when (is.na(TRAINING_TECHNIQUES_BEDHEIGHT) ~ NA_real_,
                                  TRAINING_TECHNIQUES_BEDHEIGHT == "Y" ~ 1,
                                 TRUE ~ 0),
          
          TRNG_BEDANG = case_when (is.na(TRAINING_TECHNIQUES_BEDANGLE) ~ NA_real_,
                                   TRAINING_TECHNIQUES_BEDANGLE == "Y" ~ 1,
                                TRUE ~ 0),
          
          TRNG_MONITOR = case_when (is.na(TRAINING_TECHNIQUES_MONITORHEIGHT) ~ NA_real_,
                                    TRAINING_TECHNIQUES_MONITORHEIGHT == "Y" ~ 1,
                                 TRUE ~ 0),
          
          TRNG_MANEUVERS = case_when (is.na(TRAINING_TECHNIQUES_MUSCULOSKELETAL) ~ NA_real_,
                                      TRAINING_TECHNIQUES_MUSCULOSKELETAL == "Y" ~ 1,
                                    TRUE ~ 0) ,

          TRNG_EXERCISE = case_when (is.na(TRAINING_TECHNIQUES_EXERCISE_STRETCHING) ~ NA_real_,
                                     TRAINING_TECHNIQUES_EXERCISE_STRETCHING == "Y" ~ 1,
                                    TRUE ~ 0),
          
          TRNG_DIALEXT= case_when (is.na(TRAINING_TECHNIQUES_DIAL_EXTENDERS) ~ NA_real_,
                                   TRAINING_TECHNIQUES_DIAL_EXTENDERS == "Y" ~ 1,
                                   TRUE ~ 0),
          
          TRNG_PEDISCOPE= case_when (is.na(TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE) ~ NA_real_,
                                     TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE == "Y" ~ 1,
                                   TRUE ~ 0),
          
          TRNG_FEEDBACK= case_when (is.na(ERGO_FEEDBACK) ~ NA_real_,
                                    ERGO_FEEDBACK == "Often" ~ 1,
                                    ERGO_FEEDBACK == "Sometimes" ~ 1,
                                    TRUE ~ 0),
          
          TRNG_OPTIM= case_when (is.na(ERGO_OPTIMIZATION) ~ NA_real_,
                                 ERGO_OPTIMIZATION == "Y" ~ 1,
                                 ERGO_OPTIMIZATION == "N" ~ 0,
                                TRUE ~ 0),
          
          TRNG_GLOVE_AVAIL = case_when (is.na(GLOVE_SIZE_AVAILABLE) ~ NA_real_,
                                        GLOVE_SIZE_AVAILABLE == "Y" ~ 1,
                                 TRUE ~ 0),
          
          TRNG_DIALEXT_AVAIL= case_when (is.na(DIAL_EXTENDERS_AVAILABLE) ~ NA_real_,
                                         DIAL_EXTENDERS_AVAILABLE == "Y" ~ 1,
                                        TRUE ~ 0),
          
          TRNG_DIALEXT_ENC= case_when (is.na(DIAL_EXTENDERS_ENCOURAGED) ~ NA_real_,
                                       DIAL_EXTENDERS_ENCOURAGED == "Y" ~ 1,
                                                                    TRUE ~ 0),
          
          TRNG_PEDISCOPE_AVAIL = case_when (is.na(PEDI_COLONOSCOPES_AVAILABLE) ~ NA_real_,
                                            PEDI_COLONOSCOPES_AVAILABLE == "Y" ~ 1,
                                        TRUE ~ 0),
          
          TRNG_APRON_LW1P = case_when (is.na(LEAD_APRONS_LW_ONEPIECE) ~ NA_real_,
                                               LEAD_APRONS_LW_ONEPIECE == "Y" ~ 1,
                                            TRUE ~ 0),
          
          TRNG_APRON_LW2P = case_when (is.na(LEAD_APRONS_LW_TWOPIECE) ~ NA_real_,
                                       LEAD_APRONS_LW_TWOPIECE == "Y" ~ 1,
                                       TRUE ~ 0),

          TRNG_TO_FORMAL= case_when (is.na(ERGO_FORMAL_TIMEOUT_PRIOR) ~ NA_real_,
                                     ERGO_FORMAL_TIMEOUT_PRIOR == "Y" ~ 1,
                                            TRUE ~ 0),
          
          TRNG_TO_INFORMAL= case_when (is.na(ERGO_INFORMAL_TIMEOUT_PRIOR) ~ NA_real_,
                                       ERGO_INFORMAL_TIMEOUT_PRIOR == "Y" ~ 1,
                                     TRUE ~ 0),

          TRNG_MONITORS_EASYADJ = case_when (is.na(MONITORS_ADJUSTABLE) ~ NA_real_,
                                             MONITORS_ADJUSTABLE == "Y" ~ 1,
                                     TRUE ~ 0),
          
          TRNG_TRAINERS_SENSITIVITY = case_when (is.na(TEACHER_SENSITIVITY_STATURE_HANDSIZE) ~ NA_real_,
                                                 TEACHER_SENSITIVITY_STATURE_HANDSIZE == "Y" ~ 1,
                                                  TRUE ~ 0),
          
          
          TRNG_TACTILE_MALES = case_when (is.na(TACTILE_INSTRUCTION_FROM_MALES) ~ NA_real_,
                                          TACTILE_INSTRUCTION_FROM_MALES == "Often" ~ 1,
                                                 TRUE ~ 0),
          
          TRNG_TACTILE_FEMALES = case_when (is.na(TACTILE_INSTRUCTION_FROM_FEMALES) ~ NA_real_,
                                            TACTILE_INSTRUCTION_FROM_FEMALES == "Often" ~ 1,
                                          TRUE ~ 0))  %>% 
  
  rowwise() %>% 
  mutate(TRNG_SCORE = sum(TRNG_FORMAL+ TRNG_INFORMAL+ TRNG_POSTURAL+ TRNG_BEDHT+ +TRNG_BEDANG+ TRNG_MONITOR+ TRNG_MANEUVERS+ TRNG_EXERCISE+ TRNG_DIALEXT+
                          TRNG_PEDISCOPE+ TRNG_FEEDBACK+ TRNG_OPTIM+ TRNG_GLOVE_AVAIL+ TRNG_DIALEXT_AVAIL+ TRNG_DIALEXT_ENC+ TRNG_PEDISCOPE_AVAIL+
                            TRNG_APRON_LW1P+ TRNG_APRON_LW2P+ TRNG_TO_FORMAL+ TRNG_TO_INFORMAL+ TRNG_MONITORS_EASYADJ + TRNG_TRAINERS_SENSITIVITY+
                          TRNG_TACTILE_MALES+ TRNG_TACTILE_FEMALES),
                          
          TRNG_SCORE2 = sum(TRNG_FORMAL+ TRNG_INFORMAL+ TRNG_POSTURAL+ TRNG_BEDHT+ +TRNG_BEDANG+ TRNG_MONITOR+ TRNG_MANEUVERS+ TRNG_EXERCISE+ TRNG_DIALEXT+
                              TRNG_PEDISCOPE+ TRNG_FEEDBACK+ TRNG_TO_FORMAL+ TRNG_TO_INFORMAL+  TRNG_TRAINERS_SENSITIVITY+
                              TRNG_TACTILE_MALES+ TRNG_TACTILE_FEMALES) ,
         
          EQUIP_SCORE = sum(TRNG_OPTIM+ TRNG_GLOVE_AVAIL+ TRNG_DIALEXT_AVAIL+ TRNG_DIALEXT_ENC+ TRNG_PEDISCOPE_AVAIL+
                              TRNG_APRON_LW1P+ TRNG_APRON_LW2P + TRNG_TRAINERS_SENSITIVITY+ TRNG_MONITORS_EASYADJ
                              )) %>% 
  
  select( RECORD_ID, BIRTHSEX, BIRTHSEX2, AGE2, AGENUM, RACE, RACE2, TRAINING_LEVEL, starts_with("TRNG_"), starts_with("EQUIP_") )







TRAINING %>% 
  group_by(BIRTHSEX) %>% 
  summarize( TRNGMEAN = mean(TRNG_SCORE, na.rm=T),
             TRNGSD = sd(TRNG_SCORE, na.rm=T))
## # A tibble: 2 × 3
##   BIRTHSEX TRNGMEAN TRNGSD
##   <fct>       <dbl>  <dbl>
## 1 F            11.9   3.71
## 2 M            12.3   3.36
shapiro.test(TRAINING$TRNG_SCORE)
## 
##  Shapiro-Wilk normality test
## 
## data:  TRAINING$TRNG_SCORE
## W = 0.9831, p-value = 0.007635
eov.ttest(TRAINING, TRNG_SCORE, BIRTHSEX)
## [1] "F Test p.value = 0.29308  EOV = TRUE (Pooled)"
## [1] "TRAINING : TRNG_SCORE ~ BIRTHSEX"
## 
##  Two Sample t-test
## 
## data:  TRAINING : TRNG_SCORE ~ BIRTHSEX
## t = -0.79713, df = 228, p-value = 0.4262
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -1.2890501  0.5464879
## sample estimates:
## mean in group F mean in group M 
##        11.90741        12.27869
ggbetweenstats( data= TRAINING,
                x = BIRTHSEX,
                y = TRNG_SCORE,
                type="parametric",
                p.adjust.method = "none",
                title = "Mean Training Scores by Birth Sex")

corr.test(  TRAINING$BIRTHSEX2,TRAINING$TRNG_SCORE, method= 'pearson', ci=T)
## Call:corr.test(x = TRAINING$BIRTHSEX2, y = TRAINING$TRNG_SCORE, method = "pearson", 
##     ci = T)
## Correlation matrix 
## [1] -0.05
## Sample Size 
## [1] 230
## These are the unadjusted probability values.
##   The probability values  adjusted for multiple tests are in the p.adj object. 
## [1] 0.43
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option


Simple Regressions to Predict Pain Score (Likelihood to Sustain Pain/Injury)


# BIRTHSEX predicting PAIN_SCORE

summary(lm(PAIN_SCORE ~ BIRTHSEX , data = SCORES))
## 
## Call:
## lm(formula = PAIN_SCORE ~ BIRTHSEX, data = SCORES)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2655 -0.7642 -0.2655  0.7345  4.2358 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.2655     0.1278  17.720  < 2e-16 ***
## BIRTHSEXM    -0.5013     0.1771  -2.831  0.00505 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.359 on 234 degrees of freedom
## Multiple R-squared:  0.0331, Adjusted R-squared:  0.02897 
## F-statistic: 8.012 on 1 and 234 DF,  p-value: 0.005051
# BIRTHSEX predicting PAIN_SCORE when controlled for EQUIP_SCORE

summary(lm(PAIN_SCORE ~  BIRTHSEX + EQUIP_SCORE , data = SCORES))
## 
## Call:
## lm(formula = PAIN_SCORE ~ BIRTHSEX + EQUIP_SCORE, data = SCORES)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4855 -0.9155 -0.3722  0.6232  4.0845 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.70281    0.25705  10.515   <2e-16 ***
## BIRTHSEXM   -0.46136    0.18222  -2.532   0.0120 *  
## EQUIP_SCORE -0.10866    0.05857  -1.855   0.0649 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.358 on 229 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.05059,    Adjusted R-squared:  0.0423 
## F-statistic: 6.102 on 2 and 229 DF,  p-value: 0.00262
# BIRTHSEX predicting PAIN_SCORE when controlled for EQUIP_SCORE & AGEBAND


summary(lm(PAIN_SCORE ~  BIRTHSEX + EQUIP_SCORE + AGE2 , data = SCORES))
## 
## Call:
## lm(formula = PAIN_SCORE ~ BIRTHSEX + EQUIP_SCORE + AGE2, data = SCORES)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5805 -0.9306 -0.2907  0.6265  4.1546 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.16251    0.39371   5.493 1.06e-07 ***
## BIRTHSEXM   -0.41806    0.18322  -2.282   0.0234 *  
## EQUIP_SCORE -0.11591    0.05824  -1.990   0.0478 *  
## AGE230-34    0.64978    0.34264   1.896   0.0592 .  
## AGE235-40    0.33279    0.40318   0.825   0.4100    
## AGE2> 40    -1.28081    1.39037  -0.921   0.3579    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.348 on 226 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.07705,    Adjusted R-squared:  0.05663 
## F-statistic: 3.774 on 5 and 226 DF,  p-value: 0.00266
anova(lm(PAIN_SCORE ~ BIRTHSEX + EQUIP_SCORE + AGE2 , data = SCORES))
## Analysis of Variance Table
## 
## Response: PAIN_SCORE
##              Df Sum Sq Mean Sq F value   Pr(>F)   
## BIRTHSEX      1  16.16 16.1637  8.8951 0.003173 **
## EQUIP_SCORE   1   6.35  6.3481  3.4934 0.062907 . 
## AGE2          3  11.77  3.9245  2.1597 0.093607 . 
## Residuals   226 410.68  1.8172                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# COMPARE how M/F PAIN_SCORE means differ in unadjusted and fully adjusted model


library(emmeans)

#unadjusted model
pscore.unadj <- emmeans(lm(PAIN_SCORE ~ BIRTHSEX , data = SCORES), specs=  "BIRTHSEX")
summary(pscore.unadj)
##  BIRTHSEX emmean    SE  df lower.CL upper.CL
##  F          2.27 0.128 234     2.01     2.52
##  M          1.76 0.123 234     1.52     2.01
## 
## Confidence level used: 0.95
contrast(pscore.unadj, list(FvM = c(1,-1)))
##  contrast estimate    SE  df t.ratio p.value
##  FvM         0.501 0.177 234   2.831  0.0051
#fully adjusted model
pscore.adj <- emmeans(lm(PAIN_SCORE ~ BIRTHSEX + EQUIP_SCORE + AGE2 , data = SCORES), specs=  "BIRTHSEX")
summary(pscore.adj)
##  BIRTHSEX emmean    SE  df lower.CL upper.CL
##  F          1.61 0.373 226    0.875     2.35
##  M          1.19 0.358 226    0.488     1.90
## 
## Results are averaged over the levels of: AGE2 
## Confidence level used: 0.95
contrast(pscore.adj, list(FvM = c(1,-1)))
##  contrast estimate    SE  df t.ratio p.value
##  FvM         0.418 0.183 226   2.282  0.0234
## 
## Results are averaged over the levels of: AGE2
#Compare all combinations of BIRTHSEX-AGE2 to see if any sliver is significantly different (after Benjamini-Hochberg correction for multiple comparisons)

pscore.adj2 <- emmeans(lm(PAIN_SCORE ~ BIRTHSEX + EQUIP_SCORE + AGE2 , data = SCORES), specs=  c("BIRTHSEX","AGE2"))
summary(pscore.adj2)
##  BIRTHSEX AGE2   emmean    SE  df lower.CL upper.CL
##  F        < 30   1.6854 0.340 226    1.016     2.35
##  M        < 30   1.2673 0.341 226    0.596     1.94
##  F        30-34  2.3352 0.135 226    2.069     2.60
##  M        30-34  1.9171 0.137 226    1.648     2.19
##  F        35-40  2.0182 0.264 226    1.498     2.54
##  M        35-40  1.6001 0.238 226    1.131     2.07
##  F        > 40   0.4046 1.361 226   -2.277     3.09
##  M        > 40  -0.0135 1.348 226   -2.670     2.64
## 
## Confidence level used: 0.95
pairs(pscore.adj2, adjust="bh", type="response")
##  contrast              estimate    SE  df t.ratio p.value
##  F < 30 - M < 30         0.4181 0.183 226   2.282  0.1093
##  F < 30 - (F 30-34)     -0.6498 0.343 226  -1.896  0.2071
##  F < 30 - (M 30-34)     -0.2317 0.388 226  -0.597  0.5938
##  F < 30 - (F 35-40)     -0.3328 0.403 226  -0.825  0.4783
##  F < 30 - (M 35-40)      0.0853 0.427 226   0.200  0.8419
##  F < 30 - F > 40         1.2808 1.390 226   0.921  0.4772
##  F < 30 - M > 40         1.6989 1.390 226   1.222  0.3730
##  M < 30 - (F 30-34)     -1.0678 0.389 226  -2.747  0.1093
##  M < 30 - (M 30-34)     -0.6498 0.343 226  -1.896  0.2071
##  M < 30 - (F 35-40)     -0.7509 0.458 226  -1.639  0.2872
##  M < 30 - (M 35-40)     -0.3328 0.403 226  -0.825  0.4783
##  M < 30 - F > 40         0.8627 1.415 226   0.610  0.5938
##  M < 30 - M > 40         1.2808 1.390 226   0.921  0.4772
##  (F 30-34) - (M 30-34)   0.4181 0.183 226   2.282  0.1093
##  (F 30-34) - (F 35-40)   0.3170 0.255 226   1.243  0.3730
##  (F 30-34) - (M 35-40)   0.7351 0.291 226   2.522  0.1093
##  (F 30-34) - F > 40      1.9306 1.355 226   1.425  0.3352
##  (F 30-34) - M > 40      2.3487 1.355 226   1.734  0.2624
##  (M 30-34) - (F 35-40)  -0.1011 0.335 226  -0.302  0.7913
##  (M 30-34) - (M 35-40)   0.3170 0.255 226   1.243  0.3730
##  (M 30-34) - F > 40      1.5125 1.380 226   1.096  0.4041
##  (M 30-34) - M > 40      1.9306 1.355 226   1.425  0.3352
##  (F 35-40) - (M 35-40)   0.4181 0.183 226   2.282  0.1093
##  (F 35-40) - F > 40      1.6136 1.369 226   1.179  0.3730
##  (F 35-40) - M > 40      2.0317 1.374 226   1.479  0.3352
##  (M 35-40) - F > 40      1.1955 1.389 226   0.861  0.4783
##  (M 35-40) - M > 40      1.6136 1.369 226   1.179  0.3730
##  F > 40 - M > 40         0.4181 0.183 226   2.282  0.1093
## 
## P value adjustment: BH method for 28 tests
#FINDING:  
#In the fully-adjusted model, of the 28 possible SEX/AGEBAND combinations, the one that is statistically significant - and survives a correction for multiple comparisons - is "Males < 30" versus "Females 30-34." Here the Females' mean is 1.327 points higher than the Males', p=0.0292.  (The F-M differential for entire adjusted model was only 0.388 points.)


#Since Females 30-34 have the highest adjusted Pain Score, how does that score compare to the mean Pain Score of ALL OTHER RESPONDENTS?


# Mean for Females 30-34
contrast( pscore.adj2, list(F3034 = c(0,0,1,0,0,0,0,0)))
##  contrast estimate    SE  df t.ratio p.value
##  F3034        2.34 0.135 226  17.267  <.0001
#Mean for Everyone Else
contrast( pscore.adj2, list(OTHERS = c(1/7, 1/7, 0, 1/7, 1/7, 1/7, 1/7, 1/7)))
##  contrast estimate    SE  df t.ratio p.value
##  OTHERS       1.27 0.403 226   3.150  0.0019
#Contast Females 30-34 relative to all other respondents
contrast( pscore.adj2, list("Females 30-34 rel to Others" = c(-1/7, -1/7, 1, -1/7, -1/7, -1/7, -1/7, -1/7)))
##  contrast                    estimate    SE  df t.ratio p.value
##  Females 30-34 rel to Others     1.07 0.416 226   2.562  0.0111
# EQUIP_SCORE predicting PAIN_SCORE when controlled for BIRTHSEX & TRAINING_LEVEL & AGEBAND
# Should Equipment Score be the "outcome of interest"? (In other words, EQUIP_SCORE observed after controlling for Sex and Training Level ... or Age?) 

summary(lm(PAIN_SCORE ~ EQUIP_SCORE + BIRTHSEX + TRAINING_LEVEL  , data = SCORES))
## 
## Call:
## lm(formula = PAIN_SCORE ~ EQUIP_SCORE + BIRTHSEX + TRAINING_LEVEL, 
##     data = SCORES)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6999 -0.9068 -0.2472  0.5709  3.9229 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                2.76863    0.29237   9.469   <2e-16 ***
## EQUIP_SCORE               -0.11318    0.05842  -1.937   0.0540 .  
## BIRTHSEXM                 -0.46520    0.18420  -2.525   0.0122 *  
## TRAINING_LEVELFirst Year  -0.28440    0.21977  -1.294   0.1969    
## TRAINING_LEVELSecond Year  0.15766    0.21943   0.719   0.4732    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.354 on 226 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.06702,    Adjusted R-squared:  0.05051 
## F-statistic: 4.059 on 4 and 226 DF,  p-value: 0.003377
anova(lm(PAIN_SCORE   ~ EQUIP_SCORE + BIRTHSEX + TRAINING_LEVEL , data = SCORES))
## Analysis of Variance Table
## 
## Response: PAIN_SCORE
##                 Df Sum Sq Mean Sq F value  Pr(>F)  
## EQUIP_SCORE      1  10.72 10.7194  5.8492 0.01638 *
## BIRTHSEX         1  11.42 11.4184  6.2306 0.01327 *
## TRAINING_LEVEL   2   7.62  3.8082  2.0780 0.12757  
## Residuals      226 414.18  1.8326                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#LOGISTIC MODEL - Does higher EQUIP_SCORE reduce odds of experiencing PAIN/INJURY

PAIN2 <-
          sqldf(" select a.*,
                        b.EQUIP_SCORE,
                        b.TRNG_SCORE,
                        case when b.PAIN_SCORE = 0 then 0 else 1 end as EVER_INJ_PAIN
                 from PAIN as a left join SCORES as b on (a.RECORD_ID = b.RECORD_ID) ")

  
  everinj.equip.glm.unadj <- glm( EVER_INJ_PAIN ~ EQUIP_SCORE  , data= PAIN2 , family= binomial(link="logit"))
  everinj.equip.emmeans.unadj <- emmeans( everinj.equip.glm.unadj,  c(~EQUIP_SCORE), at = list(EQUIP_SCORE = c(1:7)), type= "response", adjust="none")
  
  
  
  everinj.equip.glm.adj <- glm( EVER_INJ_PAIN ~ EQUIP_SCORE + BIRTHSEX  , data= PAIN2 , family= binomial(link="logit"))
  everinj.equip.emmeans.adj <- emmeans( everinj.equip.glm.adj,  ~c(EQUIP_SCORE, BIRTHSEX), at = list(EQUIP_SCORE = c(1:7)), type= "response", adjust="none")
  
  everinj.trng.glm.unadj <- glm( EVER_INJ_PAIN ~ TRNG_SCORE  , data= PAIN2 , family= binomial(link="logit"))
  everinj.trng.emmeans.unadj <- emmeans( everinj.trng.glm.unadj,  c(~TRNG_SCORE), at = list(TRNG_SCORE = c(5:24)), type= "response", adjust="none")
  
  
  
  summary(everinj.equip.glm.unadj)
## 
## Call:
## glm(formula = EVER_INJ_PAIN ~ EQUIP_SCORE, family = binomial(link = "logit"), 
##     data = PAIN2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4836   0.3578   0.4175   0.4862   0.8639  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.6786     0.6947   5.295 1.19e-07 ***
## EQUIP_SCORE  -0.3206     0.1380  -2.323   0.0202 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 145.50  on 231  degrees of freedom
## Residual deviance: 140.15  on 230  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 144.15
## 
## Number of Fisher Scoring iterations: 5
  summary(everinj.equip.emmeans.unadj)
##  EQUIP_SCORE  prob     SE  df asymp.LCL asymp.UCL
##            1 0.966 0.0184 Inf     0.905     0.989
##            2 0.954 0.0193 Inf     0.897     0.980
##            3 0.938 0.0193 Inf     0.888     0.967
##            4 0.917 0.0191 Inf     0.871     0.947
##            5 0.889 0.0227 Inf     0.836     0.926
##            6 0.853 0.0358 Inf     0.768     0.910
##            7 0.808 0.0597 Inf     0.664     0.899
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
  summary(everinj.equip.glm.adj)
## 
## Call:
## glm(formula = EVER_INJ_PAIN ~ EQUIP_SCORE + BIRTHSEX, family = binomial(link = "logit"), 
##     data = PAIN2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5356   0.3319   0.3862   0.4630   0.8822  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.7788     0.7177   5.265  1.4e-07 ***
## EQUIP_SCORE  -0.3025     0.1408  -2.149   0.0317 *  
## BIRTHSEXM    -0.3129     0.4771  -0.656   0.5119    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 145.50  on 231  degrees of freedom
## Residual deviance: 139.71  on 229  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 145.71
## 
## Number of Fisher Scoring iterations: 5
  summary(everinj.equip.emmeans.adj)
##  EQUIP_SCORE BIRTHSEX  prob     SE  df asymp.LCL asymp.UCL
##            1 F        0.970 0.0175 Inf     0.909     0.991
##            2 F        0.960 0.0192 Inf     0.900     0.984
##            3 F        0.946 0.0212 Inf     0.886     0.976
##            4 F        0.929 0.0247 Inf     0.862     0.964
##            5 F        0.906 0.0324 Inf     0.821     0.953
##            6 F        0.877 0.0470 Inf     0.752     0.944
##            7 F        0.840 0.0703 Inf     0.653     0.936
##            1 M        0.959 0.0246 Inf     0.873     0.988
##            2 M        0.946 0.0261 Inf     0.865     0.979
##            3 M        0.928 0.0268 Inf     0.855     0.966
##            4 M        0.905 0.0275 Inf     0.836     0.947
##            5 M        0.876 0.0313 Inf     0.800     0.925
##            6 M        0.839 0.0434 Inf     0.735     0.907
##            7 M        0.794 0.0661 Inf     0.636     0.895
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
  summary(everinj.trng.glm.unadj)
## 
## Call:
## glm(formula = EVER_INJ_PAIN ~ TRNG_SCORE, family = binomial(link = "logit"), 
##     data = PAIN2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4195   0.3967   0.4335   0.4732   0.5819  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.16745    0.87178   3.633  0.00028 ***
## TRNG_SCORE  -0.07386    0.06568  -1.125  0.26072    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 145.09  on 229  degrees of freedom
## Residual deviance: 143.80  on 228  degrees of freedom
##   (6 observations deleted due to missingness)
## AIC: 147.8
## 
## Number of Fisher Scoring iterations: 5
  summary(everinj.trng.emmeans.unadj)
##  TRNG_SCORE  prob     SE  df asymp.LCL asymp.UCL
##           5 0.943 0.0304 Inf     0.845     0.980
##           6 0.938 0.0290 Inf     0.851     0.976
##           7 0.934 0.0273 Inf     0.856     0.971
##           8 0.929 0.0255 Inf     0.860     0.966
##           9 0.924 0.0236 Inf     0.863     0.959
##          10 0.919 0.0217 Inf     0.865     0.953
##          11 0.913 0.0202 Inf     0.865     0.946
##          12 0.907 0.0195 Inf     0.862     0.939
##          13 0.901 0.0201 Inf     0.854     0.934
##          14 0.894 0.0225 Inf     0.841     0.931
##          15 0.887 0.0267 Inf     0.823     0.930
##          16 0.879 0.0325 Inf     0.800     0.930
##          17 0.871 0.0398 Inf     0.771     0.931
##          18 0.863 0.0483 Inf     0.739     0.933
##          19 0.854 0.0579 Inf     0.702     0.935
##          20 0.844 0.0687 Inf     0.661     0.938
##          21 0.834 0.0805 Inf     0.617     0.940
##          22 0.824 0.0933 Inf     0.570     0.943
##          23 0.813 0.1073 Inf     0.522     0.945
##          24 0.801 0.1222 Inf     0.473     0.948
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
  # 
  # 
  # summary(everinj.respect.glm.adj)
  # summary(everinj.respect.emmeans.adj, adjust="none")
  # contrast(everinj.respect.emmeans.adj, list("Respect1 - Respect2" = c(-1/2, 1/2, 0, 0, 0, 0, 0, 0, 0,
  #                                                                      -1/2, 1/2, 0 ,0, 0, 0, 0, 0, 0 )))
  # contrast(everinj.respect.emmeans.adj, list("Respect5 - Respect6" = c(0, 0, 0, 0, -1/2, 1/2, 0, 0, 0,
  #                                                                      0, 0 ,0, 0, -1/2, 1/2, 0, 0 ,0)))


Question 4: What happens if we attempt to compute a “Respect Score?” Gender differences?

Nine-Point Scale, higher score indicates higher level of Respect
  • Confident Asking Nurses (Yes=1, default=0)
  • Confident Asking Assistants (Yes=1, default=0)
  • Frequency Asking Nurses (Once=1, default=0)
  • Recognized by ES Staff (Yes=1, default=0)
  • Recognized by Anesthetists (Yes=1, default=0)
  • Recognized by GI Attending (Yes=1, default=0)
  • Pains dismissed as Growing Pain (Yes=0, default=1, REVERSE SCORED)
  • Teachers train with sensitivity to stature (Yes=1, default=0)
  • First Name Used No Permission (Yes=0, default=1, REVERSE SCORED)


# CREATE RESPECT INDEX 


 
  
  RESPECT <-
    
    SURVEY %>% 
    mutate( BIRTHSEX2 = case_when( BIRTHSEX == "M" ~ 0,
                                   TRUE ~ 1 ),
            
            HEIGHT = recode_factor(HEIGHT2, "< 5'"='1', 
                                   "5-5'3" = '2',
                                   "5'4-5'6" = '3',
                                   "5'7-5'9" = '4',
                                   "5'10-6'" = '5',
                                   "6'1-6'4" = '6',
                                   "> 6'4" = '7'),
            HEIGHT = relevel(HEIGHT, ref="5"),
            
            HTCAT = recode_factor(HEIGHT2, "< 5'"='0', 
                                  "5-5'3" = '0',
                                  "5'4-5'6" = '0',
                                  "5'7-5'9" = '1',
                                  "5'10-6'" = '1',
                                  "6'1-6'4" = '1',
                                  "> 6'4" = '1'),
            
            RESPECT_NURSES = case_when (is.na(COMFORTABLE_ASKING_NURSES) ~ NA_real_,
                                        COMFORTABLE_ASKING_NURSES == 'Y' ~ 1,
                                        TRUE ~ 0 ),
            
            RESPECT_TECHS = case_when (is.na(COMFORTABLE_ASKING_TECHS) ~ NA_real_,
                                       COMFORTABLE_ASKING_TECHS == "Y" ~ 1,
                                       TRUE ~ 0),
            
            RESPECT_NFREQ = case_when (is.na(NURSES_ASKING) ~ NA_real_,
                                       NURSES_ASKING == "Once" ~ 1,
                                       TRUE ~ 0),
            
            RESPECT_ESTAFF = case_when (is.na(RECOGNIZED_RESPECTED_ES_STAFF) ~ NA_real_,
                                        RECOGNIZED_RESPECTED_ES_STAFF == "Y" ~ 1,
                                        TRUE ~ 0),
            
            RESPECT_ANES = case_when (is.na(RECOGNIZED_RESPECTED_ANESTHETISTS) ~ NA_real_,
                                      RECOGNIZED_RESPECTED_ANESTHETISTS == "Y" ~ 1,
                                      TRUE ~ 0),
            
            RESPECT_GIATT = case_when (is.na(RECOGNIZED_RESPECTED_ANESTHETISTS) ~ NA_real_,
                                       RECOGNIZED_RESPECTED_GASTRO_ATTENDING == "Y" ~ 1,
                                       TRUE ~ 0),
            
            RESPECT_GPAINS = case_when (is.na(GROWING_PAINS) ~ NA_real_,
                                        GROWING_PAINS  == "Y" ~ 0,
                                        TRUE ~ 1),
            
            RESPECT_SENSITIVITY = case_when (is.na(TEACHER_SENSITIVITY_STATURE_HANDSIZE) ~ NA_real_,
                                             TEACHER_SENSITIVITY_STATURE_HANDSIZE  == "Y" ~ 1,
                                             TRUE ~ 0),
            
            
            RESPECT_FNAME =  case_when (is.na(FIRST_NAME_NO_PERMISSION) ~ NA_real_,
                                        FIRST_NAME_NO_PERMISSION == "Y" ~ 0,
                                        TRUE ~ 1)) %>% 
    
    
    rowwise() %>% 
    mutate(RESPECT_SCORE = sum( RESPECT_NURSES + RESPECT_TECHS + RESPECT_NFREQ + RESPECT_ESTAFF + RESPECT_ANES + 
                                  RESPECT_GIATT + RESPECT_GPAINS + RESPECT_SENSITIVITY + RESPECT_FNAME)) %>% 
    
    select(RECORD_ID, BIRTHSEX, BIRTHSEX2, AGE2, RACE, RACE2, HEIGHT, HTCAT, TRAINING_LEVEL, EVER_INJURED, starts_with("RESPECT_") )
  
  
  
  RESPECT %>% 
    group_by(BIRTHSEX) %>% 
    summarize( RSPECTMEAN = mean(RESPECT_SCORE, na.rm=T),
               RSPECTSD = sd(RESPECT_SCORE, na.rm=T))
## # A tibble: 2 × 3
##   BIRTHSEX RSPECTMEAN RSPECTSD
##   <fct>         <dbl>    <dbl>
## 1 F              6.38     1.78
## 2 M              7.08     1.35
shapiro.test(RESPECT$RESPECT_SCORE)
## 
##  Shapiro-Wilk normality test
## 
## data:  RESPECT$RESPECT_SCORE
## W = 0.88243, p-value = 2.431e-12
eov.ttest(RESPECT, RESPECT_SCORE, BIRTHSEX)
## [1] "F Test p.value = 0.003502347  EOV = FALSE (Satterthwaite)"
## [1] "RESPECT : RESPECT_SCORE ~ BIRTHSEX"
## 
##  Welch Two Sample t-test
## 
## data:  RESPECT : RESPECT_SCORE ~ BIRTHSEX
## t = -3.3138, df = 200.78, p-value = 0.001092
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -1.1146980 -0.2830084
## sample estimates:
## mean in group F mean in group M 
##        6.376147        7.075000
eov.ttest(RESPECT, RESPECT_SCORE, RACE2)
## [1] "F Test p.value = 0.6175523  EOV = TRUE (Pooled)"
## [1] "RESPECT : RESPECT_SCORE ~ RACE2"
## 
##  Two Sample t-test
## 
## data:  RESPECT : RESPECT_SCORE ~ RACE2
## t = 1.2879, df = 227, p-value = 0.1991
## alternative hypothesis: true difference in means between group WHITE and group NON-WHITE is not equal to 0
## 95 percent confidence interval:
##  -0.1462269  0.6980528
## sample estimates:
##     mean in group WHITE mean in group NON-WHITE 
##                6.898990                6.623077
ggbetweenstats( data= RESPECT,
                x = BIRTHSEX,
                y = RESPECT_SCORE,
                type="parametric",
                p.adjust.method = "none",
                title = "Mean RESPECT Scores by Birth Sex")

ggbetweenstats( data= RESPECT,
                x = RACE2,
                y = RESPECT_SCORE,
                type="parametric",
                p.adjust.method = "none",
                title = "Mean RESPECT Scores by Race (Broad)")

respect.birthsex.lm.unadj  <- lm( RESPECT_SCORE ~ BIRTHSEX , data= RESPECT)
respect.htcat.lm.unadj    <-  lm( RESPECT_SCORE ~ HTCAT , data= RESPECT)
respect.birthsex.lm.adj   <- lm( RESPECT_SCORE ~ BIRTHSEX + RACE2 , data= RESPECT)
      


  tab_model( respect.birthsex.lm.unadj,
             respect.birthsex.lm.adj,
             respect.htcat.lm.unadj,
             
             title= "Respect Score - Comparison of Three Models based on Quasi Score <br>.",
             dv.labels= c('Birth Sex Unadj','Height Cat Unadj', 'Birth Sex Adjusted Race'))
Respect Score - Comparison of Three Models based on Quasi Score
.
  Birth Sex Unadj Height Cat Unadj Birth Sex Adjusted Race
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 6.38 6.08 – 6.67 <0.001 6.45 6.03 – 6.87 <0.001 6.31 5.99 – 6.62 <0.001
BIRTHSEX [M] 0.70 0.29 – 1.11 0.001 0.67 0.25 – 1.10 0.002
RACE2 [NON-WHITE] -0.11 -0.53 – 0.32 0.623
HTCAT [1] 0.75 0.33 – 1.16 <0.001
Observations 229 229 229
R2 / R2 adjusted 0.047 / 0.043 0.048 / 0.040 0.053 / 0.048
# Can one's RESPECT score have an influence on Injury/Pain reports?
  
  
  
  RESPECT2 <-
      sqldf( "select a.*,
                    case when b.PAIN_SCORE = 0 then 0 else 1 end as EVER_INJ_PAIN
              from RESPECT as a left join PAIN as b on 
              a.RECORD_ID = b.RECORD_ID ")
  
  
  
  everinj.respect.glm.unadj <- glm( EVER_INJ_PAIN ~ RESPECT_SCORE  , data= RESPECT2 , family= binomial(link="logit"))
  everinj.respect.emmeans.unadj <- emmeans( everinj.respect.glm.unadj,  c(~RESPECT_SCORE), at = list(RESPECT_SCORE = c(1:9)), type= "response", adjust="none")
  
  
  everinj.respect.glm.adj <- glm( EVER_INJ_PAIN ~ RESPECT_SCORE + BIRTHSEX  , data= RESPECT2 , family= binomial(link="logit"))
  everinj.respect.emmeans.adj <- emmeans( everinj.respect.glm.adj,  ~c(RESPECT_SCORE, BIRTHSEX), at = list(RESPECT_SCORE = c(1:9)), type= "response", adjust="none")
  
  summary(everinj.respect.glm.unadj)
## 
## Call:
## glm(formula = EVER_INJ_PAIN ~ RESPECT_SCORE, family = binomial(link = "logit"), 
##     data = RESPECT2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7193   0.3506   0.4369   0.5418   0.6677  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     5.4997     1.4848   3.704 0.000212 ***
## RESPECT_SCORE  -0.4569     0.1972  -2.317 0.020510 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 144.89  on 228  degrees of freedom
## Residual deviance: 138.16  on 227  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 142.16
## 
## Number of Fisher Scoring iterations: 6
  summary(everinj.respect.glm.adj)
## 
## Call:
## glm(formula = EVER_INJ_PAIN ~ RESPECT_SCORE + BIRTHSEX, family = binomial(link = "logit"), 
##     data = RESPECT2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7612   0.3254   0.4024   0.4959   0.6957  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     5.5457     1.4955   3.708 0.000209 ***
## RESPECT_SCORE  -0.4390     0.2000  -2.195 0.028195 *  
## BIRTHSEXM      -0.2997     0.4747  -0.631 0.527825    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 144.89  on 228  degrees of freedom
## Residual deviance: 137.76  on 226  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 143.76
## 
## Number of Fisher Scoring iterations: 6
  summary(everinj.respect.emmeans.adj, adjust="none")
##  RESPECT_SCORE BIRTHSEX  prob      SE  df asymp.LCL asymp.UCL
##              1 F        0.994 0.00779 Inf     0.928     1.000
##              2 F        0.991 0.01026 Inf     0.923     0.999
##              3 F        0.986 0.01311 Inf     0.918     0.998
##              4 F        0.978 0.01616 Inf     0.911     0.995
##              5 F        0.966 0.01906 Inf     0.901     0.989
##              6 F        0.948 0.02188 Inf     0.884     0.978
##              7 F        0.922 0.02692 Inf     0.850     0.961
##              8 F        0.884 0.04111 Inf     0.777     0.944
##              9 F        0.831 0.07181 Inf     0.644     0.931
##              1 M        0.992 0.01080 Inf     0.898     0.999
##              2 M        0.987 0.01420 Inf     0.892     0.999
##              3 M        0.981 0.01809 Inf     0.886     0.997
##              4 M        0.970 0.02206 Inf     0.879     0.993
##              5 M        0.955 0.02534 Inf     0.870     0.985
##              6 M        0.932 0.02704 Inf     0.856     0.969
##              7 M        0.898 0.02834 Inf     0.827     0.942
##              8 M        0.850 0.03837 Inf     0.759     0.911
##              9 M        0.785 0.06868 Inf     0.622     0.890
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
  contrast(everinj.respect.emmeans.adj, list("Respect1 - Respect2" = c(-1/2, 1/2, 0, 0, 0, 0, 0, 0, 0,
                                                                       -1/2, 1/2, 0 ,0, 0, 0, 0, 0, 0 )))
##  contrast            odds.ratio    SE  df null z.ratio p.value
##  Respect1 / Respect2      0.645 0.129 Inf    1  -2.195  0.0282
## 
## Tests are performed on the log odds ratio scale
  contrast(everinj.respect.emmeans.adj, list("Respect5 - Respect6" = c(0, 0, 0, 0, -1/2, 1/2, 0, 0, 0,
                                                                       0, 0 ,0, 0, -1/2, 1/2, 0, 0 ,0)))
##  contrast            odds.ratio    SE  df null z.ratio p.value
##  Respect5 / Respect6      0.645 0.129 Inf    1  -2.195  0.0282
## 
## Tests are performed on the log odds ratio scale
  #CONCLUSION
  
  # For each increment a person improves on the RESPECT_SCORE scale, the exp. odds of incurring an injury or experiencing transient pain REDUCE by ~ 35% in the model adjusted for BIRTHSEX.


Question 5: What happens if we use Factor Analysis as a means of data reduction and to confirm that the variables that we intuitively assume belong together actually load highly on the same factor?

  • Pass along virtually all non-demographic variables, converting answers to a numeric scale
  • Examine correlation matrix to highlight pairs of overly correlated variables (PCoeff >0.90). Consider dropping one of the variables
  • Examine results of the Kaiser-Meyer-Olin Factor Adequacy Test. Consider dropping any variable with a score below 0.50.
  • Examine Eigen Value chart and decide on (1) number of factors to output; (2) whether to rotate your factor solution
  • Output Factor Scores and append to base dataset so that T-tests and regressions can be run by various categorical variables (Sex, Training Level, Agecat, etc.)



FACTOR_BASE <-
  
  SURVEY %>% 
  mutate(
          HTCAT = recode_factor(HEIGHT2, "< 5'"='0', 
                                "5-5'3" = '0',
                                "5'4-5'6" = '0',
                                "5'7-5'9" = '1',
                                "5'10-6'" = '1',
                                "6'1-6'4" = '1',
                                "> 6'4" = '1'),
       
          PAIN_INJURY =  case_when(  is.na(EVER_INJURED) ~ NA_real_,
                                     EVER_INJURED == "Y" ~ 1,
                                     TRUE ~ 0),
          
          PAIN_HAND =  case_when(  is.na(EXPERIENCED_TRANSIENT_PAIN_HAND) ~ NA_real_,
                                   EXPERIENCED_TRANSIENT_PAIN_HAND == "Y" ~ 1,
                                   TRUE ~ 0),        
          PAIN_NECKSH =  case_when(  is.na(EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER) ~ NA_real_,
                                     EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER == "Y" ~ 1,
                                   TRUE ~ 0),        
          PAIN_BACK =  case_when(  is.na(EXPERIENCED_TRANSIENT_PAIN_BACK) ~ NA_real_,
                                   EXPERIENCED_TRANSIENT_PAIN_BACK == "Y" ~ 1,
                                   TRUE ~ 0),        
          PAIN_LEG =  case_when(  is.na(EXPERIENCED_TRANSIENT_PAIN_LEG) ~ NA_real_,
                                  EXPERIENCED_TRANSIENT_PAIN_LEG == "Y" ~ 1,
                                   TRUE ~ 0),        
          PAIN_FOOT = case_when(  is.na(EXPERIENCED_TRANSIENT_PAIN_FOOT) ~ NA_real_,
                                     EXPERIENCED_TRANSIENT_PAIN_FOOT == "Y" ~ 1,
                                      TRUE ~ 0),
          
    
         # DEMO_HOURS = case_when (is.na(PERFORMANCE_HOURS) ~ NA_real_,
         #                          PERFORMANCE_HOURS == '< 10' ~ 1,
         #                          PERFORMANCE_HOURS == '10-20' ~ 2,
         #                          PERFORMANCE_HOURS == '21-30' ~ 3,
         #                          PERFORMANCE_HOURS == '31-40' ~ 4,
         #                          TRUE ~ 5 ),

        # DEMO_FEMTEACH = case_when (is.na(FEMALE_TRAINERS) ~ NA_real_,
        #                            FEMALE_TRAINERS == '1-2' ~ 1,
        #                            FEMALE_TRAINERS == '3-5' ~ 2,
        #                            FEMALE_TRAINERS == '6-10' ~ 3,
        #                            FEMALE_TRAINERS == '> 10' ~ 4,
        #                            TRUE ~ 0 ),
        # 
        # DEMO_MALETEACH = case_when (is.na(MALE_TRAINERS) ~ NA_real_,
        #                             MALE_TRAINERS == '1-2' ~ 1,
        #                             MALE_TRAINERS == '3-5' ~ 2,
        #                             MALE_TRAINERS == '6-10' ~ 3,
        #                             MALE_TRAINERS == '> 10' ~ 4,
        #                            TRUE ~ 0 ),
        # 
        #   DEMO_SGPREF = case_when (is.na(TEACHER_GENDER_PREFERENCE) ~ NA_real_,
        #                           TEACHER_GENDER_PREFERENCE == 'Yes' ~ 1,
        #                           TRUE ~ 0),

         # DEMO_GLOVESIZE = GLOVE,

          
          
        
          
          #Higher numbers equal higher levels of training/attention
          
          TRNG_FORMAL = case_when (is.na(FELLOWSHIP_FORMAL_ERGO_TRAINING) ~ NA_real_,
                                   FELLOWSHIP_FORMAL_ERGO_TRAINING == 'Y' ~ 1,
                                   TRUE ~ 0 ),
          
          
          TRNG_INFORMAL = case_when (is.na(INFORMAL_TRAINING) ~ NA_real_,
                                   INFORMAL_TRAINING == 'Y' ~ 1,
                                   TRUE ~ 0 ),
          
          
          TRNG_POSTURAL = case_when (is.na(TRAINING_TECHNIQUES_POSTURAL) ~ NA_real_,
                                     TRAINING_TECHNIQUES_POSTURAL == "Y" ~ 1,
                                     TRUE ~ 0),
          
          TRNG_BEDHT = case_when (is.na(TRAINING_TECHNIQUES_BEDHEIGHT) ~ NA_real_,
                                  TRAINING_TECHNIQUES_BEDHEIGHT == "Y" ~ 1,
                                  TRUE ~ 0),
          
          TRNG_BEDANG = case_when (is.na(TRAINING_TECHNIQUES_BEDANGLE) ~ NA_real_,
                                   TRAINING_TECHNIQUES_BEDANGLE == "Y" ~ 1,
                                   TRUE ~ 0),
          
          TRNG_MONITOR = case_when (is.na(TRAINING_TECHNIQUES_MONITORHEIGHT) ~ NA_real_,
                                    TRAINING_TECHNIQUES_MONITORHEIGHT == "Y" ~ 1,
                                    TRUE ~ 0),
          
          TRNG_MANEUVERS = case_when (is.na(TRAINING_TECHNIQUES_MUSCULOSKELETAL) ~ NA_real_,
                                      TRAINING_TECHNIQUES_MUSCULOSKELETAL == "Y" ~ 1,
                                      TRUE ~ 0) ,
          
          TRNG_EXERCISE = case_when (is.na(TRAINING_TECHNIQUES_EXERCISE_STRETCHING) ~ NA_real_,
                                     TRAINING_TECHNIQUES_EXERCISE_STRETCHING == "Y" ~ 1,
                                     TRUE ~ 0),
          
          TRNG_DIALEXT= case_when (is.na(TRAINING_TECHNIQUES_DIAL_EXTENDERS) ~ NA_real_,
                                   TRAINING_TECHNIQUES_DIAL_EXTENDERS == "Y" ~ 1,
                                   TRUE ~ 0),
          
          TRNG_PEDISCOPE= case_when (is.na(TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE) ~ NA_real_,
                                     TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE == "Y" ~ 1,
                                     TRUE ~ 0),
          
          TRNG_FEEDBACK= case_when (is.na(ERGO_FEEDBACK) ~ NA_real_,
                                    ERGO_FEEDBACK == "Often" ~ 3,
                                    ERGO_FEEDBACK == "Sometimes" ~ 2,
                                    ERGO_FEEDBACK == "Rarely" ~ 1,
                                    TRUE ~ 0),
          
          TRNG_OPTIM= case_when (is.na(ERGO_OPTIMIZATION) ~ NA_real_,
                                 ERGO_OPTIMIZATION == "Y" ~ 1,
                                 ERGO_OPTIMIZATION == "N" ~ 0,
                                 TRUE ~ 0),
          
          
          #Higher numbers equal heightened equipment availability
          
          EQUIP_GLOVE_AVAIL = case_when (is.na(GLOVE_SIZE_AVAILABLE) ~ NA_real_,
                                        GLOVE_SIZE_AVAILABLE == "Y" ~ 1,
                                        TRUE ~ 0),
          
          EQUIP_DIALEXT_AVAIL= case_when (is.na(DIAL_EXTENDERS_AVAILABLE) ~ NA_real_,
                                         DIAL_EXTENDERS_AVAILABLE == "Y" ~ 1,
                                         TRUE ~ 0),
          
          EQUIP_DIALEXT_ENC= case_when (is.na(DIAL_EXTENDERS_ENCOURAGED) ~ NA_real_,
                                       DIAL_EXTENDERS_ENCOURAGED == "Y" ~ 1,
                                       TRUE ~ 0),
          
          # EQUIP_PEDISCOPE_AVAIL = case_when (is.na(PEDI_COLONOSCOPES_AVAILABLE) ~ NA_real_,
          #                                   PEDI_COLONOSCOPES_AVAILABLE == "Y" ~ 1,
          #                                   TRUE ~ 0),
          
          EQUIP_LAPRON_AVAIL = case_when (is.na(LEAD_APRONS_AVAILABLE) ~ NA_real_,
                                          LEAD_APRONS_AVAILABLE == "Y" ~ 2,
                                          LEAD_APRONS_AVAILABLE == "N" ~ 1,
                                             TRUE ~ 0),
          # EQUIP_TO_FORMAL= case_when (is.na(ERGO_FORMAL_TIMEOUT_PRIOR) ~ NA_real_,
          #                            ERGO_FORMAL_TIMEOUT_PRIOR == "Y" ~ 1,
          #                            TRUE ~ 0),

          EQUIP_TO_INFORMAL= case_when (is.na(ERGO_INFORMAL_TIMEOUT_PRIOR) ~ NA_real_,
                                       ERGO_INFORMAL_TIMEOUT_PRIOR == "Y" ~ 1,
                                       TRUE ~ 0),
          
          EQUIP_MONITORS_EASYADJ = case_when (is.na(MONITORS_ADJUSTABLE) ~ NA_real_,
                                             MONITORS_ADJUSTABLE == "Y" ~ 1,
                                             TRUE ~ 0),
          
          
          TRNG_TACTILE_MALES = case_when (is.na(TACTILE_INSTRUCTION_FROM_MALES) ~ NA_real_,
                                          TACTILE_INSTRUCTION_FROM_MALES == "Often" ~ 2,
                                          TACTILE_INSTRUCTION_FROM_MALES == "Rarely" ~ 1,
                                          TRUE ~ 0),

          TRNG_TACTILE_FEMALES = case_when (is.na(TACTILE_INSTRUCTION_FROM_FEMALES) ~ NA_real_,
                                            TACTILE_INSTRUCTION_FROM_FEMALES == "Often" ~ 2,
                                            TACTILE_INSTRUCTION_FROM_FEMALES == "Rarely" ~ 1,
                                            TRUE ~ 0),

          
          
          # Higher numbers indicated heightened levels of respect
          
          RESPECT_NURSES = case_when (is.na(COMFORTABLE_ASKING_NURSES) ~ NA_real_,
                                      COMFORTABLE_ASKING_NURSES == 'Y' ~ 1,
                                      TRUE ~ 0 ),
          
          RESPECT_TECHS = case_when (is.na(COMFORTABLE_ASKING_TECHS) ~ NA_real_,
                                     COMFORTABLE_ASKING_TECHS == "Y" ~ 1,
                                     TRUE ~ 0),
          
          # RESPECT_NFREQ = case_when (is.na(NURSES_ASKING) ~ NA_real_,
          #                            NURSES_ASKING == "Once" ~ 2,
          #                            NURSES_ASKING == "Twice" ~ 1,
          #                            TRUE ~ 0),
          
          RESPECT_ESTAFF = case_when (is.na(RECOGNIZED_RESPECTED_ES_STAFF) ~ NA_real_,
                                      RECOGNIZED_RESPECTED_ES_STAFF == "Y" ~ 1,
                                      TRUE ~ 0),
          
          RESPECT_ANES = case_when (is.na(RECOGNIZED_RESPECTED_ANESTHETISTS) ~ NA_real_,
                                    RECOGNIZED_RESPECTED_ANESTHETISTS == "Y" ~ 1,
                                    TRUE ~ 0),
          
          RESPECT_GIATT = case_when (is.na(RECOGNIZED_RESPECTED_ANESTHETISTS) ~ NA_real_,
                                     RECOGNIZED_RESPECTED_GASTRO_ATTENDING == "Y" ~ 1,
                                     TRUE ~ 0),
          
          RESPECT_GPAINS = case_when (is.na(GROWING_PAINS) ~ NA_real_,
                                      GROWING_PAINS  == "Y" ~ 0,
                                      TRUE ~ 1),
          
          TRNG_SENSITIVITY = case_when (is.na(TEACHER_SENSITIVITY_STATURE_HANDSIZE) ~ NA_real_,
                                           TEACHER_SENSITIVITY_STATURE_HANDSIZE  == "Y" ~ 1,
                                           TRUE ~ 0),
          
          
          RESPECT_FNAME =  case_when (is.na(FIRST_NAME_NO_PERMISSION) ~ NA_real_,
                                      FIRST_NAME_NO_PERMISSION == "Y" ~ 0,
                                      TRUE ~ 1))  %>% 
  
  
  #Mean Substitute missing values FOR EACH SEX
# 
# group_by( BIRTHSEX, RACE2, TRAINING_LEVEL ) %>%
# mutate_at(vars( DEMO_HOURS:RESPECT_FNAME), ~replace_na(., mean(., na.rm = TRUE))) %>%

  
  select( RECORD_ID, BIRTHSEX, RACE2, TRAINING_LEVEL, AGE2, HEIGHT2, HTCAT, 
          starts_with('DEMO_'), starts_with('TRNG_'),  starts_with('EQUIP_'), starts_with('RESPECT_'), starts_with('PAIN_')) %>% 
  
  na.omit()  %>%   left_join(CORRECT[, c("RECORD_ID", "COUNT_CORRECT")], by= 'RECORD_ID')  %>% rename( ERGO_QUIZ = COUNT_CORRECT)  %>%
                   left_join(APRON_SCORE[, c("RECORD_ID", "APRON_SCORE")], by= 'RECORD_ID')  %>% rename( EQUIP_APRONS_CT = APRON_SCORE)


Here’s a glimpse of the structure of the resulting dataset FACTOR_BASE:

glimpse(FACTOR_BASE)
## Rows: 225
## Columns: 43
## $ RECORD_ID              <dbl> 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15,…
## $ BIRTHSEX               <fct> F, F, F, M, F, F, F, F, M, M, F, F, F, M, M, F,…
## $ RACE2                  <fct> NON-WHITE, WHITE, WHITE, NON-WHITE, NON-WHITE, …
## $ TRAINING_LEVEL         <ord> Advanced, Advanced, First Year, Advanced, Secon…
## $ AGE2                   <fct> 30-34, 30-34, 30-34, 30-34, 30-34, 30-34, 30-34…
## $ HEIGHT2                <fct> 5'4-5'6, 5'4-5'6, 5'4-5'6, 6'1-6'4, 5'7-5'9, 5-…
## $ HTCAT                  <fct> 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,…
## $ TRNG_FORMAL            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,…
## $ TRNG_INFORMAL          <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,…
## $ TRNG_POSTURAL          <dbl> 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,…
## $ TRNG_BEDHT             <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ TRNG_BEDANG            <dbl> 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1,…
## $ TRNG_MONITOR           <dbl> 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1,…
## $ TRNG_MANEUVERS         <dbl> 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1,…
## $ TRNG_EXERCISE          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,…
## $ TRNG_DIALEXT           <dbl> 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,…
## $ TRNG_PEDISCOPE         <dbl> 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1,…
## $ TRNG_FEEDBACK          <dbl> 2, 1, 2, 1, 1, 2, 2, 2, 2, 1, 3, 2, 1, 1, 1, 2,…
## $ TRNG_OPTIM             <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0,…
## $ TRNG_TACTILE_MALES     <dbl> 2, 0, 0, 0, 0, 0, 2, 2, 2, 0, 1, 1, 1, 1, 1, 1,…
## $ TRNG_TACTILE_FEMALES   <dbl> 0, 0, 0, 1, 1, 0, 2, 2, 2, 0, 1, 1, 0, 1, 1, 2,…
## $ TRNG_SENSITIVITY       <dbl> 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1,…
## $ EQUIP_GLOVE_AVAIL      <dbl> 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,…
## $ EQUIP_DIALEXT_AVAIL    <dbl> 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ EQUIP_DIALEXT_ENC      <dbl> 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ EQUIP_LAPRON_AVAIL     <dbl> 2, 2, 0, 2, 0, 1, 0, 1, 2, 2, 0, 0, 1, 2, 2, 2,…
## $ EQUIP_TO_INFORMAL      <dbl> 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,…
## $ EQUIP_MONITORS_EASYADJ <dbl> 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1,…
## $ RESPECT_NURSES         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,…
## $ RESPECT_TECHS          <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,…
## $ RESPECT_ESTAFF         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,…
## $ RESPECT_ANES           <dbl> 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0,…
## $ RESPECT_GIATT          <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ RESPECT_GPAINS         <dbl> 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1,…
## $ RESPECT_FNAME          <dbl> 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1,…
## $ PAIN_INJURY            <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ PAIN_HAND              <dbl> 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1,…
## $ PAIN_NECKSH            <dbl> 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1,…
## $ PAIN_BACK              <dbl> 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1,…
## $ PAIN_LEG               <dbl> 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,…
## $ PAIN_FOOT              <dbl> 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,…
## $ ERGO_QUIZ              <dbl> 0, 4, 4, 3, 3, 3, 3, 5, 4, 1, 3, 5, 3, 2, 2, 5,…
## $ EQUIP_APRONS_CT        <dbl> 1, 2, 0, 5, 0, 4, 0, 4, 2, 5, 0, 0, 1, 0, 0, 4,…



correlations <- round(cor(FACTOR_BASE[, -c(1:7)]),2)
corr_matrix <- as.data.frame(as.table(correlations))
corr_matrix %>% arrange(desc(Freq)) %>% rename(PCoeff= Freq) %>% filter(PCoeff !=1 & abs(PCoeff) > 0.90)
## [1] Var1   Var2   PCoeff
## <0 rows> (or 0-length row.names)
X <- na.omit(FACTOR_BASE[, -c(1:7)])

# summary(lm( ERGO_QUIZ ~ ., data=X ))
# vif(lm( ERGO_QUIZ ~ ., X))


#Kaiser-Meyer-Olkin factor adequacy
# Variables < 0.50 should be eliminated from analysis
# Score north of 0.70 is ideal; above 0.60 is acceptable
  
KMO( X)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = X)
## Overall MSA =  0.66
## MSA for each item = 
##            TRNG_FORMAL          TRNG_INFORMAL          TRNG_POSTURAL 
##                   0.63                   0.71                   0.59 
##             TRNG_BEDHT            TRNG_BEDANG           TRNG_MONITOR 
##                   0.69                   0.85                   0.70 
##         TRNG_MANEUVERS          TRNG_EXERCISE           TRNG_DIALEXT 
##                   0.77                   0.71                   0.65 
##         TRNG_PEDISCOPE          TRNG_FEEDBACK             TRNG_OPTIM 
##                   0.69                   0.80                   0.71 
##     TRNG_TACTILE_MALES   TRNG_TACTILE_FEMALES       TRNG_SENSITIVITY 
##                   0.55                   0.54                   0.70 
##      EQUIP_GLOVE_AVAIL    EQUIP_DIALEXT_AVAIL      EQUIP_DIALEXT_ENC 
##                   0.61                   0.61                   0.64 
##     EQUIP_LAPRON_AVAIL      EQUIP_TO_INFORMAL EQUIP_MONITORS_EASYADJ 
##                   0.57                   0.65                   0.63 
##         RESPECT_NURSES          RESPECT_TECHS         RESPECT_ESTAFF 
##                   0.65                   0.65                   0.67 
##           RESPECT_ANES          RESPECT_GIATT         RESPECT_GPAINS 
##                   0.74                   0.66                   0.66 
##          RESPECT_FNAME            PAIN_INJURY              PAIN_HAND 
##                   0.80                   0.62                   0.59 
##            PAIN_NECKSH              PAIN_BACK               PAIN_LEG 
##                   0.75                   0.57                   0.65 
##              PAIN_FOOT              ERGO_QUIZ        EQUIP_APRONS_CT 
##                   0.63                   0.52                   0.63
remove<- colnames(X[ , KMO(X)$MSAi<0.5])
print(remove)
## character(0)
# Bartlett's Test for Sphericity - If you can't reject this null hypothesis, then there is essentially nothing to factor, as all variables are essentially different. 
# In other words, you want the analysis of your data matrix to have a resultant p value less than 0.05

psych::cortest.bartlett( X )
## R was not square, finding R from data
## $chisq
## [1] 1986.178
## 
## $p.value
## [1] 4.081542e-140
## 
## $df
## [1] 630


  • Conclusion: Questions that were targeted only to a subset of respondents (e.g., items around Pregnancy) were eliminated from the final FACTOR_BASE dataset. While some varialbes exhibited a high degree of correlation, none exceeded the 0.90 threshold. However, results of the Kaiser-Meyer-Olin test (KMO) suggested that the following variables should not be part of the factor analysis, as each had a KMO score below 0.50: Availability of *Pedioscopes, Formal Ergo Timeouts, Times Needed to Get Nurses’ Attention,

  • The final FACTOR_BASE dataset contains 36 Questions related to Training, Equipment Availability, Pain/Injury Sustained, as well as two composite variables: ERGO_QUIZ (a variable capturing the number of questions each repsonded answered correctly in the Ergonomic Knowledge section of the survey); and (2) EQUIP_APRON_CT (a variable capturing the number of specialty aprons reponsdents indicated were available at their institutions).

# What does R recommend for the baseline number of Factors?
fa.parallel(X)

## Parallel analysis suggests that the number of factors =  8  and the number of components =  5
ev <- eigen(cor(X))
ev$values
##  [1] 4.2456923 2.7125913 2.4270167 1.9331420 1.8700064 1.5005781 1.4615129
##  [8] 1.4064787 1.2625193 1.2176464 1.1372209 1.0339807 0.9829390 0.9187299
## [15] 0.8923378 0.8366634 0.7860185 0.7824113 0.7365471 0.7115283 0.6865098
## [22] 0.6401780 0.6141085 0.5906948 0.5791590 0.5417329 0.4956253 0.4879920
## [29] 0.4335152 0.4075511 0.3868419 0.3430471 0.3178170 0.2781951 0.2193328
## [36] 0.1221385
nfactors <- rep(1:36)

Eigen_Values <- ev$values
Scree <- data.frame(nfactors, Eigen_Values)

ggplot(data = Scree, mapping = aes(x = nfactors, y = Eigen_Values)) +
  geom_point() +
  geom_line() +
  scale_y_continuous(name = "Eigen Values", limits = c(0, max(ev$values+0.5))) +
  scale_x_continuous(name= "Number of Factors", breaks = scales::breaks_width(1))+
  geom_vline(xintercept= 5, linetype='solid', col = 'red')+
  geom_vline(xintercept= 9, linetype='solid', col = 'darkred')+
  theme_bw() +
  theme(panel.grid.major.y = element_line(color = "darkgrey")) +
  ggtitle("Scree Plot")


  • Conclusion: Although R recommends using nine factors, we can run multiple factor analyses to find the number that (1) explains the highest percentage of variance in our data and, even more important, (2) is easily interpretable. Nine factors may be the default, but the Scree plot makes exploring options with 5 to 8 factors reasonable. Also the first 12 factors have Eigen Values above 1.0, so exploring more factors may make sense. (More factors may make interpreting the loadings more difficult, however.)



FACTANAL.NOROT <- factanal( X, factors=5, scores = c("regression"), rotation = "none", cutoff=0.3)
print(FACTANAL.NOROT$loadings,sort=TRUE, cutoff = .35)
## 
## Loadings:
##                        Factor1 Factor2 Factor3 Factor4 Factor5
## RESPECT_NURSES          0.818                                 
## RESPECT_TECHS           0.705                                 
## RESPECT_ESTAFF          0.588                                 
## TRNG_TACTILE_MALES              0.906                         
## TRNG_TACTILE_FEMALES            0.896                         
## TRNG_INFORMAL                           0.507                 
## TRNG_DIALEXT                                    0.666         
## EQUIP_DIALEXT_AVAIL                             0.770         
## EQUIP_DIALEXT_ENC                               0.649         
## TRNG_FORMAL                                                   
## TRNG_POSTURAL                           0.463                 
## TRNG_BEDHT                                                    
## TRNG_BEDANG                                                   
## TRNG_MONITOR                            0.430                 
## TRNG_MANEUVERS                                                
## TRNG_EXERCISE                                                 
## TRNG_PEDISCOPE                                                
## TRNG_FEEDBACK                   0.414   0.367                 
## TRNG_OPTIM                                                    
## TRNG_SENSITIVITY                        0.393                 
## EQUIP_GLOVE_AVAIL                                             
## EQUIP_LAPRON_AVAIL                                            
## EQUIP_TO_INFORMAL                                             
## EQUIP_MONITORS_EASYADJ                                        
## RESPECT_ANES            0.360                                 
## RESPECT_GIATT                                                 
## RESPECT_GPAINS                                                
## RESPECT_FNAME           0.356                                 
## PAIN_INJURY                                                   
## PAIN_HAND                                                     
## PAIN_NECKSH                                             0.476 
## PAIN_BACK                                               0.353 
## PAIN_LEG                                                0.465 
## PAIN_FOOT                                                     
## ERGO_QUIZ                                                     
## EQUIP_APRONS_CT                                               
## 
##                Factor1 Factor2 Factor3 Factor4 Factor5
## SS loadings      2.626   2.404   1.975   1.900   1.365
## Proportion Var   0.073   0.067   0.055   0.053   0.038
## Cumulative Var   0.073   0.140   0.195   0.247   0.285
print(FACTANAL.NOROT$PVAL)
##    objective 
## 5.818536e-08
FACTANAL.VARIMAX <- factanal( X, factors=7, scores = c("regression"), rotation = "varimax", cutoff=0.3)
print(FACTANAL.VARIMAX$loadings,sort=TRUE, cutoff = .35)
## 
## Loadings:
##                        Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 Factor7
## TRNG_INFORMAL           0.623                                                 
## TRNG_POSTURAL           0.520                                                 
## TRNG_TACTILE_MALES              0.950                                         
## TRNG_TACTILE_FEMALES            0.878                                         
## TRNG_DIALEXT                            0.696                                 
## EQUIP_DIALEXT_AVAIL                     0.794                                 
## EQUIP_DIALEXT_ENC                       0.672                                 
## RESPECT_NURSES                                  0.751                         
## RESPECT_TECHS                                   0.841                         
## PAIN_NECKSH                                             0.533                 
## PAIN_LEG                                                0.674                 
## RESPECT_ESTAFF                                                  0.958         
## TRNG_FORMAL                                                                   
## TRNG_BEDHT              0.425                                                 
## TRNG_BEDANG                                                                   
## TRNG_MONITOR            0.489                                                 
## TRNG_MANEUVERS          0.414                                                 
## TRNG_EXERCISE                                                                 
## TRNG_PEDISCOPE                                                                
## TRNG_FEEDBACK           0.388                                                 
## TRNG_OPTIM                                                              0.387 
## TRNG_SENSITIVITY                                                        0.451 
## EQUIP_GLOVE_AVAIL                                                       0.370 
## EQUIP_LAPRON_AVAIL                                                            
## EQUIP_TO_INFORMAL                                                             
## EQUIP_MONITORS_EASYADJ                                                        
## RESPECT_ANES                                                    0.379         
## RESPECT_GIATT                                                                 
## RESPECT_GPAINS                                                                
## RESPECT_FNAME                                                                 
## PAIN_INJURY                                                                   
## PAIN_HAND                                                                     
## PAIN_BACK                                               0.387                 
## PAIN_FOOT                                               0.416                 
## ERGO_QUIZ                                                                     
## EQUIP_APRONS_CT                                                               
## 
##                Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 Factor7
## SS loadings      2.226   1.992   1.858   1.591   1.583   1.581   1.289
## Proportion Var   0.062   0.055   0.052   0.044   0.044   0.044   0.036
## Cumulative Var   0.062   0.117   0.169   0.213   0.257   0.301   0.337
print(FACTANAL.VARIMAX$PVAL)
##   objective 
## 0.001352721


  • Conclusion: Here are two sample factor analyses: one that is unrotated based on a 5-factor solution that explains 29% of the variance; and another that uses a Varimax rotation based on a 7-factor solution that explains 35% of the variance. Since our variables did not display a high degree of pairwise correlation, I have opted to use the unrotated solution. Although it explains less of the data’s variance, I prefer that it loads items related to Respect on the first factor, an area of keen interest in this study.



FACTANAL.NOROT5 <- cbind( FACTOR_BASE, FACTANAL.NOROT$scores ) %>%
  rename( F1.RESPECT = Factor1,
          F2.TACTTRNG = Factor2,
          F3.ERGOTRNG = Factor3,
          F4.DIALEXT = Factor4,
          F5.PAIN = Factor5) %>% 
          mutate(TRAINING_LEVEL =  factor(TRAINING_LEVEL, ordered = F),
                 TRAINING_LEVEL = relevel(TRAINING_LEVEL, ref= "Advanced"))




# Confirm factors are totally uncorrelated
pairs.panels( FACTANAL.NOROT5[, c(44:48)] , pch=21, stars=T) 

#Test Factor Results to see whether there are any gender differences


eov.ttest( FACTANAL.NOROT5, F1.RESPECT, BIRTHSEX)
## [1] "F Test p.value = 0.007439645  EOV = FALSE (Satterthwaite)"
## [1] "FACTANAL.NOROT5 : F1.RESPECT ~ BIRTHSEX"
## 
##  Welch Two Sample t-test
## 
## data:  FACTANAL.NOROT5 : F1.RESPECT ~ BIRTHSEX
## t = -2.4471, df = 197.26, p-value = 0.01528
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -0.55220503 -0.05936125
## sample estimates:
## mean in group F mean in group M 
##      -0.1617253       0.1440578
eov.ttest( FACTANAL.NOROT5, F2.TACTTRNG, BIRTHSEX)
## [1] "F Test p.value = 0.7882628  EOV = TRUE (Pooled)"
## [1] "FACTANAL.NOROT5 : F2.TACTTRNG ~ BIRTHSEX"
## 
##  Two Sample t-test
## 
## data:  FACTANAL.NOROT5 : F2.TACTTRNG ~ BIRTHSEX
## t = 0.85425, df = 223, p-value = 0.3939
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -0.1432977  0.3625945
## sample estimates:
## mean in group F mean in group M 
##      0.05799181     -0.05165657
eov.ttest( FACTANAL.NOROT5, F3.ERGOTRNG, BIRTHSEX)
## [1] "F Test p.value = 0.8494288  EOV = TRUE (Pooled)"
## [1] "FACTANAL.NOROT5 : F3.ERGOTRNG ~ BIRTHSEX"
## 
##  Two Sample t-test
## 
## data:  FACTANAL.NOROT5 : F3.ERGOTRNG ~ BIRTHSEX
## t = -1.0164, df = 223, p-value = 0.3105
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -0.3454330  0.1103522
## sample estimates:
## mean in group F mean in group M 
##     -0.06216581      0.05537459
eov.ttest( FACTANAL.NOROT5, F4.DIALEXT, BIRTHSEX)
## [1] "F Test p.value = 0.0003032605  EOV = FALSE (Satterthwaite)"
## [1] "FACTANAL.NOROT5 : F4.DIALEXT ~ BIRTHSEX"
## 
##  Welch Two Sample t-test
## 
## data:  FACTANAL.NOROT5 : F4.DIALEXT ~ BIRTHSEX
## t = 2.562, df = 186.73, p-value = 0.0112
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  0.07071229 0.54417992
## sample estimates:
## mean in group F mean in group M 
##       0.1626048      -0.1448413
eov.ttest( FACTANAL.NOROT5, F5.PAIN, BIRTHSEX)
## [1] "F Test p.value = 0.9740426  EOV = TRUE (Pooled)"
## [1] "FACTANAL.NOROT5 : F5.PAIN ~ BIRTHSEX"
## 
##  Two Sample t-test
## 
## data:  FACTANAL.NOROT5 : F5.PAIN ~ BIRTHSEX
## t = 2.8578, df = 223, p-value = 0.004669
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  0.09427052 0.51306934
## sample estimates:
## mean in group F mean in group M 
##       0.1606077      -0.1430623
Y <-
  FACTANAL.NOROT5 %>% 
  pivot_longer( cols= F1.RESPECT : F5.PAIN,
                names_to = "FACTOR",
                values_to = "FACTOR.SCORES")  %>% select( RECORD_ID, BIRTHSEX:HTCAT, FACTOR, FACTOR.SCORES)


grouped_ggbetweenstats(
  data = Y,
  x = BIRTHSEX,
  y = FACTOR.SCORES,
  grouping.var = FACTOR,)

#Simple Linear Regressions to see whether significance survives controlling for Race (binary values) & Training Level

fnorot5.lm1 <- lm(F1.RESPECT ~ BIRTHSEX + TRAINING_LEVEL + RACE2, data = FACTANAL.NOROT5 )
fnorot5.lm4 <- lm(F4.DIALEXT ~ BIRTHSEX + TRAINING_LEVEL + RACE2 , data = FACTANAL.NOROT5)
fnorot5.lm5 <- lm(F5.PAIN ~ BIRTHSEX + TRAINING_LEVEL + RACE2, data = FACTANAL.NOROT5)
fnorot5.lm1h <- lm( F1.RESPECT ~ HTCAT + TRAINING_LEVEL + RACE2 , data = FACTANAL.NOROT5 )

tab_model( fnorot5.lm1,
           fnorot5.lm4,
           fnorot5.lm5,
           fnorot5.lm1h, show.ci=F, show.se=T)
  F1.RESPECT F4.DIALEXT F5.PAIN F1.RESPECT
Predictors Estimates std. Error p Estimates std. Error p Estimates std. Error p Estimates std. Error p
(Intercept) 0.05 0.16 0.745 -0.01 0.15 0.934 0.20 0.14 0.132 0.04 0.16 0.794
BIRTHSEX [M] 0.26 0.13 0.043 -0.27 0.12 0.030 -0.30 0.11 0.006
TRAINING LEVEL [First
Year]
-0.34 0.15 0.023 0.13 0.14 0.354 -0.28 0.13 0.031 -0.37 0.15 0.014
TRAINING LEVEL [Second
Year]
-0.19 0.15 0.216 0.23 0.15 0.118 -0.05 0.13 0.723 -0.17 0.15 0.267
RACE2 [NON-WHITE] -0.02 0.13 0.860 0.06 0.12 0.613 0.11 0.11 0.298 -0.03 0.13 0.784
HTCAT [1] 0.26 0.13 0.043
Observations 225 225 225 225
R2 / R2 adjusted 0.050 / 0.033 0.042 / 0.024 0.062 / 0.045 0.050 / 0.033


  • Conclusion: Three of our five factors exhibited significant differences along the BIRTHSEX variable: Factor1-Respect, Factor4-DialExt, and Factor5-Pain/Injury. Moreover, when modeled in simple linear regressions and controlled for both RACE and TRAINING_LEVEL, each factor remained significant above the p= 0.05 level. (Factor5-Pain/Injury was most significant at p= 0.009.)

  • Another interesting finding here is that the HTCAT variable (which divides the sample into those above and below the 5’7” cutoff) behaves almost identically on the Respect factor to Birthsex, which is a result we’ve seen elsewhere. This could further bolster the claim that when it comes to feelings of disrespect in the Endoscopy Suite, height may be just as important a consideration as sex.



#Test Each factor for consistency



f1 <- X[ , c("RESPECT_NURSES", "RESPECT_TECHS", "RESPECT_ESTAFF", "RESPECT_ANES", "RESPECT_FNAME")]
f1a<- alpha(f1, check.keys=T)
alpha(f1, check.keys=T)
## 
## Reliability analysis   
## Call: alpha(x = f1, check.keys = T)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##        0.7      0.73    0.74      0.36 2.8 0.032 0.85 0.24     0.28
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.64   0.7  0.76
## Duhachek  0.64   0.7  0.76
## 
##  Reliability if an item is dropped:
##                raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## RESPECT_NURSES      0.61      0.64    0.58      0.31 1.8    0.041 0.0059  0.28
## RESPECT_TECHS       0.63      0.66    0.62      0.33 2.0    0.041 0.0141  0.28
## RESPECT_ESTAFF      0.62      0.66    0.65      0.32 1.9    0.042 0.0348  0.26
## RESPECT_ANES        0.68      0.72    0.71      0.39 2.6    0.036 0.0356  0.33
## RESPECT_FNAME       0.73      0.75    0.74      0.43 3.0    0.031 0.0287  0.40
## 
##  Item statistics 
##                  n raw.r std.r r.cor r.drop mean   sd
## RESPECT_NURSES 225  0.73  0.77  0.76   0.56 0.89 0.31
## RESPECT_TECHS  225  0.70  0.74  0.70   0.53 0.89 0.31
## RESPECT_ESTAFF 225  0.71  0.75  0.67   0.57 0.92 0.27
## RESPECT_ANES   225  0.67  0.64  0.48   0.41 0.80 0.40
## RESPECT_FNAME  225  0.65  0.58  0.38   0.34 0.72 0.45
## 
## Non missing response frequency for each item
##                   0    1 miss
## RESPECT_NURSES 0.11 0.89    0
## RESPECT_TECHS  0.11 0.89    0
## RESPECT_ESTAFF 0.08 0.92    0
## RESPECT_ANES   0.20 0.80    0
## RESPECT_FNAME  0.28 0.72    0
f1a$total$std.alph
## [1] 0.7344984
f2 <- X[ , c("TRNG_TACTILE_MALES", "TRNG_TACTILE_FEMALES", "TRNG_FEEDBACK")]
f2a<- alpha(f2, check.keys=T)

f3 <- X[ , c("TRNG_INFORMAL","TRNG_POSTURAL","TRNG_MONITOR", "TRNG_SENSITIVITY")]
f3a<- alpha(f3, check.keys=T)


f4 <- X[ , c("TRNG_DIALEXT","EQUIP_DIALEXT_AVAIL","EQUIP_DIALEXT_ENC")]
f4a<- alpha(f4, check.keys=T)

f5 <- X[ , c("PAIN_NECKSH","PAIN_LEG", "PAIN_BACK")]
f5a<- alpha(f5, check.keys=T)



ta<- alpha(X, check.keys=T)


# Cronbach's Alphas for all five factors and for the overall dataset

rbind(F1.Respect.alpha=f1a$total$std.alpha, 
      F2.TactTrng.alpha=f2a$total$std.alpha, 
      F3.ErgoTrng.alpha=f3a$total$std.alpha,
      F4.DialExt.alpha=f4a$total$std.alpha, 
      F5.Pain.alpha=f5a$total$std.alpha, 
      Total.alpha= ta$total$std.alpha)
##                        [,1]
## F1.Respect.alpha  0.7344984
## F2.TactTrng.alpha 0.7505630
## F3.ErgoTrng.alpha 0.5201925
## F4.DialExt.alpha  0.7595929
## F5.Pain.alpha     0.5514775
## Total.alpha       0.7578145



  • Conclusion: Overall, our factors have a Standardized Cronbach’s Alpha of 0.761, which is considered very good. Three of our factors – F1.Respect, F2.TactileTraining, F4.DialExtenders – also have CA’s above 0.70. However, two factors – F3.ErgoTraining and F5.Pain/Injury – have CA’s of 0.52 and 0.55, respectively. Removing variables from the factor do not raise the CA’s appreciably, according to the function output (not displayed). Is this cause for concern?



  • Here’s what a 9-Factor Varimax Rotated Factor Solution produces. (Remember, this was the default number of factors suggested by R.)


FACTANAL.VARIMAX <- factanal( X, factors=9, scores = c("regression"), rotation = "varimax", cutoff=0.3)
print(FACTANAL.VARIMAX$loadings,sort=TRUE, cutoff = .3)
## 
## Loadings:
##                        Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 Factor7
## TRNG_INFORMAL           0.588                                                 
## TRNG_POSTURAL           0.596                                                 
## TRNG_TACTILE_MALES              0.939                                         
## TRNG_TACTILE_FEMALES            0.877                                         
## TRNG_DIALEXT                            0.682                                 
## EQUIP_DIALEXT_AVAIL                     0.799                                 
## EQUIP_DIALEXT_ENC                       0.672                                 
## RESPECT_NURSES                                  0.941                         
## RESPECT_TECHS                                   0.690                         
## PAIN_NECKSH                                             0.527                 
## PAIN_LEG                                                0.704                 
## RESPECT_ESTAFF                                  0.416           0.556         
## RESPECT_ANES                                                    0.604         
## TRNG_MONITOR            0.418                                                 
## EQUIP_MONITORS_EASYADJ                                                        
## EQUIP_LAPRON_AVAIL                                                            
## EQUIP_APRONS_CT                                                               
## TRNG_FORMAL                                                                   
## TRNG_BEDHT              0.427                                                 
## TRNG_BEDANG             0.326                                           0.302 
## TRNG_MANEUVERS          0.415                                                 
## TRNG_EXERCISE                                                                 
## TRNG_PEDISCOPE                                                                
## TRNG_FEEDBACK           0.474   0.318                                         
## TRNG_OPTIM                                                              0.342 
## TRNG_SENSITIVITY                                                        0.376 
## EQUIP_GLOVE_AVAIL                                                       0.379 
## EQUIP_TO_INFORMAL       0.345                                                 
## RESPECT_GIATT                                                   0.488         
## RESPECT_GPAINS                                                          0.387 
## RESPECT_FNAME                                                   0.363         
## PAIN_INJURY                                                                   
## PAIN_HAND                                                                     
## PAIN_BACK                                               0.390                 
## PAIN_FOOT                                               0.408                 
## ERGO_QUIZ                                                                     
##                        Factor8 Factor9
## TRNG_INFORMAL                         
## TRNG_POSTURAL                         
## TRNG_TACTILE_MALES                    
## TRNG_TACTILE_FEMALES                  
## TRNG_DIALEXT                          
## EQUIP_DIALEXT_AVAIL                   
## EQUIP_DIALEXT_ENC                     
## RESPECT_NURSES                        
## RESPECT_TECHS                         
## PAIN_NECKSH                           
## PAIN_LEG                              
## RESPECT_ESTAFF          0.367         
## RESPECT_ANES                          
## TRNG_MONITOR            0.537         
## EQUIP_MONITORS_EASYADJ  0.627         
## EQUIP_LAPRON_AVAIL              0.572 
## EQUIP_APRONS_CT                 0.567 
## TRNG_FORMAL                           
## TRNG_BEDHT                            
## TRNG_BEDANG                           
## TRNG_MANEUVERS                        
## TRNG_EXERCISE                         
## TRNG_PEDISCOPE                        
## TRNG_FEEDBACK                         
## TRNG_OPTIM                            
## TRNG_SENSITIVITY                      
## EQUIP_GLOVE_AVAIL                     
## EQUIP_TO_INFORMAL                     
## RESPECT_GIATT                         
## RESPECT_GPAINS                        
## RESPECT_FNAME                         
## PAIN_INJURY                           
## PAIN_HAND                             
## PAIN_BACK                             
## PAIN_FOOT                             
## ERGO_QUIZ                       0.367 
## 
##                Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 Factor7 Factor8
## SS loadings      2.189   1.950   1.858   1.720   1.517   1.312   1.142   1.126
## Proportion Var   0.061   0.054   0.052   0.048   0.042   0.036   0.032   0.031
## Cumulative Var   0.061   0.115   0.167   0.214   0.257   0.293   0.325   0.356
##                Factor9
## SS loadings      1.021
## Proportion Var   0.028
## Cumulative Var   0.384
print(FACTANAL.VARIMAX$PVAL)
## objective 
## 0.2448795
FACTANAL.VARIMAX9 <- cbind( FACTOR_BASE, FACTANAL.VARIMAX$scores ) %>% 
  rename( F1.ERGOTRNG = Factor1,
          F2.TACTTRNG = Factor2,
          F3.DIAL.EXT = Factor3,
          F4.RESPECT.JRSTAFF = Factor4,
          F5.PAIN = Factor5,
          F6.RESPECT.SRSTAFF = Factor6,
          F7.APRONS.FIT.AVAIL = Factor7,
          F8.TRNG.SENSITIVITY = Factor8,
          F9.MONITORS = Factor9) %>% 
          mutate(TRAINING_LEVEL =  factor(TRAINING_LEVEL, ordered = F),
                 TRAINING_LEVEL = relevel(TRAINING_LEVEL, ref= "Advanced"))


Y <-
  FACTANAL.VARIMAX9 %>% 
  pivot_longer( cols= F1.ERGOTRNG : F9.MONITORS,
                names_to = "FACTOR",
                values_to = "FACTOR.SCORES")  %>% select( RECORD_ID, BIRTHSEX:HTCAT, FACTOR, FACTOR.SCORES)


grouped_ggbetweenstats(
  data = Y,
  x = BIRTHSEX,
  y = FACTOR.SCORES,
  grouping.var = FACTOR )

#Simple Linear Regressions to see whether significance survives controlling for Race (binary values) & Training Level

frot9.lm5 <- lm(F5.PAIN ~ BIRTHSEX + TRAINING_LEVEL + RACE2, data = FACTANAL.VARIMAX9 )
frot9.lm6 <- lm(F6.RESPECT.SRSTAFF ~ BIRTHSEX + TRAINING_LEVEL + RACE2 , data = FACTANAL.VARIMAX9)
frot9.lm7 <- lm(F7.APRONS.FIT.AVAIL ~ BIRTHSEX + TRAINING_LEVEL + RACE2, data = FACTANAL.VARIMAX9)
frot9.lm8 <- lm( F8.TRNG.SENSITIVITY ~ BIRTHSEX + TRAINING_LEVEL + RACE2 , data = FACTANAL.VARIMAX9 )
frot9.lm6h <- lm( F6.RESPECT.SRSTAFF ~ HTCAT + TRAINING_LEVEL + RACE2 , data = FACTANAL.VARIMAX9 )

tab_model( frot9.lm5,
           frot9.lm6,
           frot9.lm7,
           frot9.lm8,
           frot9.lm6h,
           show.ci=F, show.se=T)
  F5.PAIN F6.RESPECT.SRSTAFF F7.APRONS.FIT.AVAIL F8.TRNG.SENSITIVITY F6.RESPECT.SRSTAFF
Predictors Estimates std. Error p Estimates std. Error p Estimates std. Error p Estimates std. Error p Estimates std. Error p
(Intercept) -0.02 0.14 0.892 -0.06 0.14 0.671 -0.31 0.13 0.016 -0.21 0.14 0.125 -0.14 0.14 0.301
BIRTHSEX [M] -0.22 0.11 0.058 0.27 0.11 0.017 0.42 0.10 <0.001 0.16 0.11 0.163
TRAINING LEVEL [First
Year]
-0.05 0.14 0.723 0.01 0.13 0.936 0.06 0.12 0.626 -0.04 0.13 0.746 -0.01 0.13 0.952
TRAINING LEVEL [Second
Year]
0.09 0.14 0.513 -0.07 0.13 0.600 -0.04 0.12 0.730 0.01 0.13 0.917 -0.03 0.13 0.822
RACE2 [NON-WHITE] 0.22 0.11 0.060 -0.11 0.11 0.306 0.14 0.10 0.181 0.24 0.11 0.028 -0.11 0.11 0.318
HTCAT [1] 0.36 0.11 0.001
Observations 225 225 225 225 225
R2 / R2 adjusted 0.046 / 0.028 0.040 / 0.023 0.075 / 0.058 0.026 / 0.008 0.061 / 0.044



  • Conclusion: Rotated factor solutions do not load RESPECT as highly as the unrotated solution we initially selected. Furthermore, rotated solutions with more than five factors seem to load respect-oriented elements highly onto two separate factors that can loosely be seen as RESPECT-JUNIOR STAFF (nurses, techs) and RESPECT-SR STAFF (GI Attending, Anesthesiologist), with only the latter having statistical significance when regressed onto BIRTHSEX in a linear model.

  • Four of our factors – F5.Pain, F6.Respect.SrStaff, F7.Aprons.Fit.Avail, F8.Training.Sensitivity – all showed gender difference, and with the exception of F5.Pain, BIRTHSEX maintained its significance in linear regression models after controlling for Training Level (ref level = “Advanced”) and Race (binary, ref=White vs. Non-White). The TRAINING_LEVEL variable seemed to knock the F5.Pain factor out of significance, but only barely (p=0.0610). The HTCAT variable also showed statistical significance in our fully-adjusted model with outcome factor F6.RESEPECT.SRSTAFF, which is similar to what we’ve seen in other analyses here: specifically, that height and sex seem to behave almost identically in various models where RESPECT is the outcome of interest. And in many cases, height produces a stronger model, based on the R2 score.




Question 5A: Factor Analysis Revisited: OBLIQUE (PROMAX) ROTATIONS

  • The factor analysis solutions above were based on orthogonal rotations, ones where the resulting factors were uncorrelated

  • The following factor solutions are based on oblique rotations, ones that allow the factors to retain any correlation they may exhibit among each other

  • Oblique rotations (e.g., olbimin or Promax), allow the factors themselves to be regressed onto each other to explore answers to certain research questions that are not driven solely by categorical covariates such as SEX, HEIGHT, GLOVE or RACE

  • Various factoring methods were tested - Principal Axis (PA), Minimum Residual (MINRES), Weighted Least Squares (WLS), and Maximum Likelihood Estimation (MLE) - with the last (MLE) being the method used in both these solutions

  • For the sake of brevity, the results of only two nine-factor Promax solutions are displayed below. Each has passed certain key model fit and consistency/reliability tests:

    • Each solution has an overall KMO greater than 0.67
    • Each solution explains at least 42% of the overall variance
    • Each solution has a Tucker-Lewis Index (TMI) greater than 0.92 and a Root Mean Standard Error Approximation (RMSEA) less than 0.3
    • Each solution was evaluated for overall consistency/reliability using Cronbach’s Alpha, Greatest Lower Bound, and Omega; and each scored greater than 0.70 on each of these tests
    • The individual factors themselves for each solution were evaluated for consistency/reliability, and none scored lower than 0.60 on any of the three aforementioned tests (both solutions produced eight factors that scored above the 0.70, but each solution produced only one factor that was between 0.60 and 0.69)



Promax Solution 1: 9-Factors (41 of 54 variables included in final factor load)



MLE Factor Analysis: Promax Rotation with 9 factors produced from 41 questions

Correlation Matrix for Promax-9-DialExt


  • What makes this solution interesting is that it highlights just how much of what transpires in the Endoscopy Suite hinges on the factor tentatively named Respect-EStaff (Respect Endoscopy Suite Staff).
  • Key Message: When there is Respect in the workplace, there is also training with sensitivity, nurses who respond on the first ask, heightened trainee confidence, and tendency to use lightweight, more comfortable aprons and dial extenders – all correlating with a decrease in reports of transient pain and injury
  • By expanding to nine factors, not only did we achieve optimal TMI and RMSEA values (i.e., better model fit), we saw a schism in two factors from one of our Varimax-rotated solutions: “Apron Fit & Availability” produced two distinct, reliable factors related to “Standard Aprons” and “Lightweight Aprons”; and “Respect Senior Staff” spun off a second factor more aptly described as “Trainee Confidence.”



Regressions using BIRTHSEX and select factors to predict likelihood for Pain/Injury


Regressions: BIRTHSEX predicting PAIN/INJURY


  • The regression summary above charts what happens to the coefficient of the variable BIRTHSEX when it is used to predict the factor PAIN. Unadjusted, the coefficient starts at 0.41. The next six models show what happens if only one intervention is made: For example, adding a standard apron increases the coefficient to 0.43 (i.e., increases the likelihood of PAIN). But the next single-step interventions (red underscores) - using a lightweight apron, improving trainee confidence, treating trainees with respect, having nurses be more cooperative, and training with sensitivity and appropriate feedback - each reduces the coefficient to as low as 0.34. In the next models (blue underscores), we see what happens when we (1) include all the aforementioned interventions (coefficient drops to 0.30); (2) include the Tactile Training factor, which is positively correlated with the Pain factor (coefficient drops further to 0.27); and (3) adjust further for HEIGHT (which is so correlated with BIRTHSEX that it must be included to confirm a true sex effect), and Training Level. The final model, which includes all these covariates, has a coefficient of 0.31 and our variable of interest, BIRTHSEX, has maintained its significance throughout (whereas HEIGHT and TRAINING_LEVEL did not, according to the Type 3 Anova table, not shown).
  • Final Takeaway: Training with Sensitivity + Respect from Sr. ES Staff + Cooperative Nurses + Lightweight Aprons signficiantly reduce the likelihood of Pain relative to Sex, even after adjusting for Height and Training Level.



Promax Solution 2: 9-Factors (43 of 54 variables included in final factor load)



MLE Factor Analysis: Promax Rotation with 9 factors produced from 43 questions



Correlation Matrix for Promax-9-CodeSwitch


  • This solution is very similar to the one above, except that it includes two questions that were originally ignored due to their perceived limited scope (i.e., questions regarding whether trainees were more or less likely to use Dial Extenders in front of a male or female instructor, as 70% of trainees reported not even using Dial Extenders). With the addition of these two new questions, and by holiding the number of factors requested constant at nine factors, we noticed that the “Respect” and “Trainee Confidence” factors were “reunited” to form one solid, reliable factor (identical to what was observed in the previously discussed Varimax-rotated solutions). It should be noted that the scoring of these questions included all subjects, not just those reporting having used Dial Extenders – so every respondent contributed in some way to the factor loadings and scores involving these new two questions.
  • By including these two questions, we may have stumbled upon an important factor that can be best described as "Code Switching." While the term is typically used in the context of cultural linguistics, “Code Switching” here is meant to imply that some trainees may act one way in front of one audience (i.e., “their people”) and another way in front of perceived figures of authority (or simply in front of instructors who are different from them).
  • Another way of looking at this: Code Switchers are “people pleasers” who are not inclined to “rock the boat.” And according to the correlation matrix above, they do not appear to command respect from ES Staff, do not perceive nurses as cooperative, are not vocal about ill-fitting lightweight or standard issue aprons, do not feel that they are trained with sensitivity/adequate feedback (possibly because they do not insist on it), and as a result, they have high correlation with reported incidences of transient pain and injury.



pairs.panels( codeswitch, pch=21, stars=T )



  • The factor F4.CODE.SWITCH is highly correlated with SEX (females), and HEIGHT (shorter trainees), and less correlated with TRAINING_LEVEL (where, it seems to be more positively associated with First-Year & Second-Year trainees – “Advanced” is the reference category, and as such, its bar appears first in the histogram). It is not correlated at all with RACE, however (where RACE is a binary variable, Whites vs. Non-Whites – it is higher among Non-Whites, but not statistically).



cs <-
rbind( 
  
      summary(contrast(emmeans(glm( F4.CODE.SWITCH ~ BIRTHSEX , data=TEST, family=binomial(link='logit')), spec='BIRTHSEX', reverse=T),
                       list('Unadj OR: Females vs Males CODE.SWITCH=Y' = c( -1, 1))), infer=T) %>%
        tbl_df() %>% mutate (OR.EXP = exp(estimate), LCL.EXP = exp(estimate - 1.96*SE), UCL.EXP = exp(estimate + 1.96*SE)) ,

            # regrid(contrast(emmeans(glm( F4.CODE.SWITCH ~ BIRTHSEX , data=TEST, family=binomial(link='logit')), spec='BIRTHSEX', reverse=T),
            #           list('Unadj OR: Females vs Males CODE.SWITCH=Y' = c( -1, 1)))) %>% tbl_df() %>% 
            #           mutate( OR.EXP = odds.ratio, LCL.EXP = OR.EXP-1.96*SE, UCL.EXP = OR.EXP+1.96*SE),
      

      summary(contrast(emmeans(glm( F4.CODE.SWITCH ~ HEIGHT2 , data=TEST, family=binomial(link='logit')), spec='HEIGHT2', reverse=T),
                       list("Unadj OR: 5'-5'6 vs 5'7-6'4 CODE.SWITCH=Y" = c( 0, 1/2, 1/2, -1/3 , -1/3, -1/3, 0))), infer=T) %>%
        tbl_df() %>% mutate (OR.EXP = exp(estimate), LCL.EXP = exp(estimate - 1.96*SE), UCL.EXP = exp(estimate + 1.96*SE)) ,
      
            # regrid(contrast(emmeans(glm( F4.CODE.SWITCH ~ HEIGHT2 , data=TEST, family=binomial(link='logit')), spec='HEIGHT2', reverse=T),
            #         list("Unadj OR: 5'-5'6 vs 5'7-6'4 CODE.SWITCH=Y" = c( 0, 1/2, 1/2, -1/3 , -1/3, -1/3, 0)))) %>% tbl_df() %>% 
            #           mutate( OR.EXP = odds.ratio, LCL.EXP = OR.EXP-1.96*SE, UCL.EXP = OR.EXP+1.96*SE),
  
      
      
      summary(contrast(emmeans(glm( F4.CODE.SWITCH ~ TRAINING_LEVEL , data=TEST, family=binomial(link='logit')), spec='TRAINING_LEVEL', reverse=T),
                       list('Unadj OR: First & Second Yr vs Advanced CODE.SWITCH=Y' = c( -1, 1/2, 1/2))), infer=T) %>%
        tbl_df() %>% mutate (OR.EXP = exp(estimate), LCL.EXP = exp(estimate - 1.96*SE), UCL.EXP = exp(estimate + 1.96*SE)) ,

            # regrid(contrast(emmeans(glm( F4.CODE.SWITCH ~ TRAINING_LEVEL , data=TEST, family=binomial(link='logit')), spec='TRAINING_LEVEL', reverse=T),
            #         list('Unadj OR: First & Second Yr vs Advanced CODE.SWITCH=Y' = c( -1, 1/2, 1/2)))) %>% tbl_df() %>% 
            #           mutate( OR.EXP = odds.ratio, LCL.EXP = OR.EXP-1.96*SE, UCL.EXP = OR.EXP+1.96*SE),
                   
      
      summary(contrast(emmeans(glm( F4.CODE.SWITCH ~ RACE2 , data=TEST, family=binomial(link='logit')), spec='RACE2', reverse=T),
                       list('Unadj OR: Non-Whites vs Whites CODE.SWITCH=Y' = c( -1, 1))), infer=T) %>%
        tbl_df() %>% mutate (OR.EXP = exp(estimate), LCL.EXP = exp(estimate - 1.96*SE), UCL.EXP = exp(estimate + 1.96*SE)) ,

            # regrid(contrast(emmeans(glm( F4.CODE.SWITCH ~ RACE2 , data=TEST, family=binomial(link='logit')), spec='RACE2', reverse=T),
            #         list('Unadj OR: Non-Whites vs Whites CODE.SWITCH=Y' = c( -1, 1)))) %>% tbl_df() %>% 
            #           mutate( OR.EXP = odds.ratio, LCL.EXP = OR.EXP-1.96*SE, UCL.EXP = OR.EXP+1.96*SE),

    summary(contrast(emmeans(glm( F4.CODE.SWITCH ~ F1.TRNG.FEEDBACK.SENSITIVITY , data=TEST, family=binomial(link='logit')), spec='F1.TRNG.FEEDBACK.SENSITIVITY', reverse=T),
                     list('Unadj OR: TRNG.FBACK.SENS= N &  CODE.SWITCH=Y' = c( 1, -1))), infer=T) %>%
        tbl_df() %>% mutate (OR.EXP = exp(estimate), LCL.EXP = exp(estimate - 1.96*SE), UCL.EXP = exp(estimate + 1.96*SE)) ,

            # regrid(contrast(emmeans(glm( F4.CODE.SWITCH ~ F1.TRNG.FEEDBACK.SENSITIVITY , data=TEST, family=binomial(link='logit')), spec='F1.TRNG.FEEDBACK.SENSITIVITY', reverse=T),
            #          list('Unadj OR: TRNG.FBACK.SENS= N &  CODE.SWITCH=Y' = c( 1, -1)))) %>% tbl_df() %>% 
            #           mutate( OR.EXP = odds.ratio, LCL.EXP = OR.EXP-1.96*SE, UCL.EXP = OR.EXP+1.96*SE),
    
    
    summary(contrast(emmeans(glm( F4.CODE.SWITCH ~ F2.RESPECT.CONFIDENCE , data=TEST, family=binomial(link='logit')), spec='F2.RESPECT.CONFIDENCE', reverse=T),
                     list('Unadj OR: RESPECT.CONFIDENCE= N &  CODE.SWITCH=Y' = c( 1, -1))), infer=T) %>%
        tbl_df() %>% mutate (OR.EXP = exp(estimate), LCL.EXP = exp(estimate - 1.96*SE), UCL.EXP = exp(estimate + 1.96*SE)) ,

            # regrid(contrast(emmeans(glm( F4.CODE.SWITCH ~ F2.RESPECT.CONFIDENCE , data=TEST, family=binomial(link='logit')), spec='F2.RESPECT.CONFIDENCE', reverse=T),
            #          list('Unadj OR: RESPECT.CONFIDENCE= N &  CODE.SWITCH=Y' = c( 1, -1)))) %>% tbl_df() %>% 
            #           mutate( OR.EXP = odds.ratio, LCL.EXP = OR.EXP-1.96*SE, UCL.EXP = OR.EXP+1.96*SE),
    
    
    
    summary(contrast(emmeans(glm( F4.CODE.SWITCH ~ F3.NURSE.COOP , data=TEST, family=binomial(link='logit')), spec='F3.NURSE.COOP', reverse=T),
                     list('Unadj OR: NURSE.COOP= N &  CODE.SWITCH=Y' = c( 1, -1))), infer=T) %>%
        tbl_df() %>% mutate (OR.EXP = exp(estimate), LCL.EXP = exp(estimate - 1.96*SE), UCL.EXP = exp(estimate + 1.96*SE)) ,

            # regrid(contrast(emmeans(glm( F4.CODE.SWITCH ~ F3.NURSE.COOP , data=TEST, family=binomial(link='logit')), spec='F3.NURSE.COOP', reverse=T),
            #          list('Unadj OR: NURSE.COOP= N &  CODE.SWITCH=Y' = c( 1, -1)))) %>% tbl_df() %>% 
            #           mutate( OR.EXP = odds.ratio, LCL.EXP = OR.EXP-1.96*SE, UCL.EXP = OR.EXP+1.96*SE),
    
    summary(contrast(emmeans(glm( F4.CODE.SWITCH ~ F9.PAIN , data=TEST, family=binomial(link='logit')), spec='F9.PAIN', reverse=T),
                     list('Unadj OR: TRANSIENT.PAIN= Y &  CODE.SWITCH=Y' = c( -1, 1))), infer=T) %>%
        tbl_df() %>% mutate (OR.EXP = exp(estimate), LCL.EXP = exp(estimate - 1.96*SE), UCL.EXP = exp(estimate + 1.96*SE)),

            # regrid(contrast(emmeans(glm( F4.CODE.SWITCH ~ F9.PAIN , data=TEST, family=binomial(link='logit')), spec='F9.PAIN', reverse=T),
            #           list('Unadj OR: TRANSIENT.PAIN= Y &  CODE.SWITCH=Y' = c( -1, 1)))) %>% tbl_df() %>% 
            #           mutate( OR.EXP = odds.ratio, LCL.EXP = OR.EXP-1.96*SE, UCL.EXP = OR.EXP+1.96*SE)     )

    summary(contrast(emmeans(glm( F4.CODE.SWITCH ~ F8.DIALEXT , data=TEST, family=binomial(link='logit')), spec='F8.DIALEXT', reverse=T),
                     list('Unadj OR: DIALEXT= Y &  CODE.SWITCH=Y' = c( -1, 1))), infer=T) %>%
        tbl_df() %>% mutate (OR.EXP = exp(estimate), LCL.EXP = exp(estimate - 1.96*SE), UCL.EXP = exp(estimate + 1.96*SE)) )


cs$metric = c('Sex= F', "Height < 5'7", 'Training < Advanced', 'Race= Non-White', "Trained w/Sensitivity = N",
              "Respected & Confident = N", "Nurses Cooperative = N", "Transient Pain = Y", "Uses Dial Extender = Y")


#print(cs)

# Plot

cs %>% arrange( (OR.EXP)) %>% 
  mutate ( OR.EXP = round(OR.EXP,2),
           yAxis = row_number(), 
           signif = case_when( LCL.EXP < 1 ~ 'NS',
                               p.value < 0.001 ~ "***",
                               p.value < 0.01 ~ "**",
                               p.value < 0.05 ~ "*",
                               p.value < 0.10 ~ "~",
                               TRUE ~ 'NS')) %>% 
  relocate(yAxis, .before=  contrast) %>% 
  relocate(signif, .after= p.value) %>% 
ggplot( ., aes(x = OR.EXP, y = factor(metric, level = metric)) ) +
  
  geom_vline(aes(xintercept = 1), size = 1, linetype = "solid", color= "darkred") +
  geom_errorbarh(aes(xmax = UCL.EXP, xmin = LCL.EXP), size = 2, height = .1, color = "darkgrey", alpha=0.3) +
  geom_point(size = 4, color = "dodgerblue", alpha=0.5) +
  theme_538() +
  theme(panel.grid.minor = element_blank()) +
  scale_y_discrete (breaks = cs$metric, labels = cs$metric ) +
  scale_x_continuous(breaks = seq(-1,13,1) ) + 
  geom_text(data = ~., aes(label = paste(round(OR.EXP,2), signif, sep= " "), 
                          y = metric, 
                          x= OR.EXP+0.15), size=4, color="dodgerblue", fontface="bold.italic", nudge_y= 0.35, nudge_x= 0.75)+
  
 ylab("")+
  xlab("Odds Ratio")+
  ggtitle("Likely Characteristics of Code Switchers")+
  theme(plot.title =   element_text(color = "black", size = 12, face = "bold", hjust= 0.5))+
  theme(plot.subtitle= element_text(color = "black", size = 10, face = "bold", hjust= 0.5))



  • Since factors produced by EFA always have a mean of zero, it is simple to convert them to binary variables (where negative scores=0=No and positive scores=1=Yes), making running logistic regressions on these factors easy

  • If we were to use the resulting odds ratios to inform the profile of these so-called “Code Switchers,” we might find that they are . . . .

    • 5.21 times more likely to be under 5’ 7”
    • 4.76 times more likely to be Female
    • 2.71 times more likely to be either a First- or Second-Year trainee
    • 6.41 times more likely to be unconfident or feel disrespected by ES Staff
    • 3.16 times more likely to feel that they were not trained with sensitivity or with adequate feedback
    • 1.93 times more likely to view nurses as uncooperative
    • 4.00 times more likely to report incidences of transient pain or outright injury
    • 1.83 times more likley to use Dial Extenders
    • 1.28 times more likely to be Non-White (insignificant results despite significant p-value - confint includes 1)



- (In the above logistic regression calculations the F4.CODE.SWITCH factor is the response variable (Factor 4 = 1, or “Yes”), and odds ratios are computed relative to a corresponding reference category for each test)



Regressions: BIRTHSEX predicting PAIN/INJURY


  • The regression summary is similar to the one used in the previous 9-factor PROMAX solution, where we try to confirm a SEX-PAIN association. But in addition to controlling for HEIGHT, we now must contend with this new Code Switch factor, which is correlated with both of these covariates as well as with the outcome (PAIN).
  • The first three models establish a baseline coefficient for each of these three variables: BIRTHEX = 0.43, HEIGHTBAND = 0.40 and F4.CODE.SWITCH = 0.30.
  • The next two models adjust BIRTHSEX and HEIGHTBAND for F4.CODE.SWITCH, and in each case, while all variables remain statistically significant, the p-values for BIRTHSEX and HEIGHTBAND rose, while that of the Code Swich factor remained very low (p < 0.001). In the next model, where all three variables have PAIN regressed on to them, only F4.CODE.SWITCH remains statistically significant (p < 0.001), suggesting that in this three-way battle, it is the Code Switch factor that is strongest predictor of transient pain/injury.
  • Similar to what was done previously, the next models (blue underscore) add single interventions (lighweight Aprons, Respect, Cooperative Nurses, etc.), each of which drops the coefficient for F4.CODE.SWITCH from its 0.30 high to as low as 0.20 (and this is with BIRTHSEX and HEIGHTBAND also included in the model).
  • In the two “fully loaded” models – one with Tactile Training and one without – the effect of F4.CODESWITCH remains statistically significant, and its coefficient drops as low as 0.13


  • NOTE:
    The variable HEIGHTBAND (a binvary variable, where 0=Short and 1=Tall) was used in the above regressions for the sake of simplicity. (Having only two levels, HEIGHTBAND makes tracking the effects of height easier.) But if the original multilevel categorical variable HEIGHT2 was used in these models instead – similar to what was done with the previous Promax-9 solution – the results would’ve have been virtually identical: F4.CODE.SWITCH would have knocked both BIRTHSEX and HEIGHT2 out of significance in the ‘fully loaded’ model (where adjustments were made for Aprons, Nurse Cooperation, Training with Sensitivity, Respect/Confidence, Tactile Training, and Training Level). But if F4.CODE.SWITCH is removed from the ‘fully-loaded’ model, then just as before, BIRTHSEX prevails over its highly correlated covariate HEIGHT2, retaining its significance even in the presence of all the aforementioned covariates (“interventions”), with its coefficient falling from 0.43 to 0.28, p= 0.048. While not shown, the multilevel variable HEIGHT2 is not significant in the final model, according to the Type 3 Anova table.



Regressions: BIRTHSEX predicting PAIN/INJURY with HEIGHT2, as captured in the survey